# Creating and summarizing a correlation matrix with daru and statsample

## This notebook also serves as a demostration of Daru.lazy_update and 

In [1]:
require 'statsample'

Statsample::Analysis.store("Statsample::Bivariate.correlation_matrix") do
  
  # It so happens that Daru::Vector and Daru::DataFrame must update metadata
  # like positions of missing values every time they are created. 
  #
  # Since we dont have any missing values in the data that we are creating, 
  # we set Daru.lazy_update = true so that missing data is not updated every
  # time and things happen much faster.
  #
  # In case you do have missing data and lazy_update has been set to *true*, 
  # you _SHOULD_ called `#update` on the concerned Vector or DataFrame object
  # everytime an assingment or deletion cycle is complete.
  Daru.lazy_update = true
  
  # Create a Daru::DataFrame containing 4 vectors a, b, c and d.
  #
  # Notice that the `clone` option has been set to *false*. This tells Daru
  # to not clone the Daru::Vectors being supplied by `rnorm`, since it would
  # be unnecessarily counter productive to clone the vectors once they have
  # been assigned to the dataframe.
  samples = 1000
  ds = Daru::DataFrame.new({
    :a => rnorm(samples),
    :b => rnorm(samples),
    :c => rnorm(samples),
    :d => rnorm(samples)
  }, clone: false)
  
  
  puts "== DataFrame ==\n"
  IRuby.display ds.head
  
  # Calculate correlation matrix by calling the `cor` shorthand.
  cm = Statsample::Bivariate.correlation_matrix(ds)
  
  puts "\n== Correlation Matrix ==\n"
  IRuby.display cm
  
  # Set lazy_update to *false* once our job is done so that this analysis does
  # not accidentally affect code elsewhere.
  Daru.lazy_update = false
end

Statsample::Analysis.run_batch

"if(window['d3'] === undefined ||\n   window['Nyaplot'] === undefined){\n    var path = {\"d3\":\"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min\",\"downloadable\":\"https://cdn.rawgit.com/domitry/d3-downloadable/master/d3-downloadable\"};\n\n\n\n    var shim = {\"d3\":{\"exports\":\"d3\"},\"downloadable\":{\"exports\":\"downloadable\"}};\n\n    require.config({paths: path, shim:shim});\n\n\nrequire(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');require(['downloadable'], function(downloadable){window['downloadable']=downloadable;console.log('finished loading downloadable');\n\n\tvar script = d3.select(\"head\")\n\t    .append(\"script\")\n\t    .attr(\"src\", \"https://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\")\n\t    .attr(\"async\", true);\n\n\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\n\n\n\t    var event = document.createEvent(\"HTMLEvents\");\n\t    event.initEvent(\"load_nyaplot\",false,false);\n\t    w

== DataFrame ==



Daru::DataFrame:13085860 rows: 10 cols: 4,Daru::DataFrame:13085860 rows: 10 cols: 4,Daru::DataFrame:13085860 rows: 10 cols: 4,Daru::DataFrame:13085860 rows: 10 cols: 4,Daru::DataFrame:13085860 rows: 10 cols: 4
Unnamed: 0_level_1,a,b,c,d
0,0.9011467513657144,-2.069826499630964,-0.0542418856753842,-0.2149753067446775
1,0.3477390529339665,1.9693799988325165,0.6438210000373529,0.2691070769303308
2,1.169337960514499,-1.2003500655322563,-1.8876472885285305,-0.2125026233669801
3,-2.0149242228225352,0.3777196093249942,0.233921550979305,0.3997912152405825
4,-0.8464428241042591,-1.4782182304742937,0.2245871172511823,-0.3339626361855191
5,1.2471920549543476,-0.887490499184695,-1.1041991362454315,1.932482592873003
6,-0.8529853403070782,-0.4909368945208435,-0.7306423975841205,0.8578541353085531
7,-0.3847054532477001,0.4812878996742971,0.565122377348464,-1.277608770535443
8,1.2856180977412772,-1.288566320945822,-1.7868746145697003,1.0500431060289053
9,0.5182738763941376,0.363459330228789,-0.4630107709322867,0.4544283438001362



== Correlation Matrix ==



Matrix[[1.0, -0.01467367745077135, 0.011913969445518504, -0.0034212506073735397], [-0.01467367745077135, 1.0, -0.04653177857574501, 0.022525025891610012], [0.011913969445518504, -0.04653177857574501, 1.0, 0.031949296919919545], [-0.0034212506073735397, 0.022525025891610012, 0.031949296919919545, 1.0]]

Analysis 2016-03-24 11:58:04 +0000
= Statsample::Bivariate.correlation_matrix

