public
Description: New development on the Ruby bindings for the GNU Scientific Library
Clone URL: git://github.com/codahale/ruby-gsl.git
Refactoring using function pointers (oo!) and added Dice's coefficient.
codahale (author)
Tue Feb 26 01:24:51 -0800 2008
commit  29523247ad9ca964127be051339a367bf010c9ae
tree    30fd87aba117ebcca28407fdb3d6e03840a0bbd2
parent  be62c914937c4f59f3c172b43667a93708493100
...
10
11
12
 
 
13
14
15
16
17
18
 
 
 
 
19
20
21
22
23
24
 
25
26
 
27
28
29
30
31
 
32
33
 
34
35
36
...
41
42
43
44
 
45
46
47
48
49
50
51
 
52
53
54
 
 
 
 
 
55
56
57
58
59
 
 
 
 
 
 
 
 
 
 
60
 
 
 
 
 
 
 
 
 
61
62
63
64
65
66
 
67
68
...
10
11
12
13
14
15
16
 
 
 
 
17
18
19
20
21
22
23
24
 
 
25
26
 
27
28
29
30
31
 
32
33
 
34
35
36
37
...
42
43
44
 
45
46
47
 
 
 
 
 
48
49
50
 
51
52
53
54
55
56
 
 
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
0
@@ -10,27 +10,28 @@
0
 struct array_comparison {
0
   long intersection_size;
0
   long union_size;
0
+ long a_size;
0
+ long b_size;
0
 };
0
 
0
-static struct array_comparison compare_arrays(VALUE a, VALUE b) {
0
- size_t a_size = RARRAY(a)->len;
0
- size_t b_size = RARRAY(b)->len;
0
- struct array_comparison result;
0
+typedef double (* fun_array_cmp)(const struct array_comparison);
0
+
0
+static VALUE compare_arrays(VALUE a, VALUE b, fun_array_cmp cmp) {
0
+ struct array_comparison result = { 0, 0, RARRAY(a)->len, RARRAY(b)->len };
0
   long * long_a;
0
   long * long_b;
0
   int i, j;
0
   
0
- result.intersection_size = 0;
0
- result.union_size = a_size + b_size;
0
+ result.union_size = result.a_size + result.b_size;
0
   
0
- if((a_size > 0) && (b_size > 0))
0
+ if((result.a_size > 0) && (result.b_size > 0))
0
   {
0
     COPYRUBYHASHARRAY(a, long_a);
0
     COPYRUBYHASHARRAY(b, long_b);
0
     
0
- for(i = 0; i < a_size; ++i)
0
+ for(i = 0; i < result.a_size; ++i)
0
     {
0
- for(j = 0; j < b_size; ++j)
0
+ for(j = 0; j < result.b_size; ++j)
0
       {
0
         if(long_a[i] == long_b[j])
0
         {
0
@@ -41,27 +42,45 @@ static struct array_comparison compare_arrays(VALUE a, VALUE b) {
0
     
0
   }
0
   
0
- return result;
0
+ return rb_float_new((*cmp)(result));
0
 }
0
 
0
-// Calculates the Tanimoto coefficient between two sets.
0
-static VALUE Similarity_tanimoto_coefficient(VALUE self, VALUE data1, VALUE data2) {
0
- struct array_comparison cmp = compare_arrays(data1, data2);
0
- double result = 0.0;
0
-
0
+double tanimoto_coefficient(const struct array_comparison cmp) {
0
   if(cmp.union_size > 0)
0
   {
0
- result = cmp.intersection_size / (double)(cmp.union_size - cmp.intersection_size);
0
+ return (cmp.intersection_size / (double)(cmp.union_size - cmp.intersection_size));
0
+ }
0
+ else
0
+ {
0
+ return 0.0;
0
   }
0
-
0
- return rb_float_new(result);
0
 }
0
 
0
+double dice_coefficient(const struct array_comparison cmp) {
0
+ if(cmp.union_size > 0)
0
+ {
0
+ return (2 * cmp.intersection_size) / (double)(cmp.a_size + cmp.b_size);
0
+ }
0
+ else
0
+ {
0
+ return 0.0;
0
+ }
0
+}
0
 
0
+// Calculates the Tanimoto coefficient between two sets.
0
+static VALUE Similarity_tanimoto_coefficient(VALUE self, VALUE data1, VALUE data2) {
0
+ return compare_arrays(data1, data2, tanimoto_coefficient);
0
+}
0
+
0
+
0
+static VALUE Similarity_dice_coefficient(VALUE self, VALUE data1, VALUE data2) {
0
+ return compare_arrays(data1, data2, dice_coefficient);
0
+}
0
 
0
 VALUE rbgsl_mSimilarity;
0
 void Init_Similarity() {
0
   rbgsl_mSimilarity = rb_define_module_under(rbgsl_mGSL, "Similarity");
0
 
0
   rb_define_module_function(rbgsl_mSimilarity, "tanimoto_coefficient", Similarity_tanimoto_coefficient, 2);
0
+ rb_define_module_function(rbgsl_mSimilarity, "dice_coefficient", Similarity_dice_coefficient, 2);
0
 }
0
\ No newline at end of file
...
2
3
4
5
 
6
7
8
...
10
11
12
13
 
 
 
 
 
 
 
 
 
 
 
 
 
14
15
16
17
...
2
3
4
 
5
6
7
8
...
10
11
12
 
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
0
@@ -2,7 +2,7 @@ require File.join(File.dirname(__FILE__), "..", "spec_helper")
0
 
0
 describe "Similarity" do
0
   
0
- describe "calculating the Tanimoto difference" do
0
+ describe "calculating the Tanimoto coefficient" do
0
     it "should return 0.0 if either of the sets are empty" do
0
       GSL::Similarity.tanimoto_coefficient(%w{ shirt shoes pants socks}, []).should be_close(0.0, 0.0001)
0
       GSL::Similarity.tanimoto_coefficient([], %w{ shirt skirt shoes }).should be_close(0.0, 0.0001)
0
@@ -10,7 +10,19 @@ describe "Similarity" do
0
     end
0
     
0
     it "should return a float" do
0
- GSL::Similarity.tanimoto_coefficient(%w{ shirt shoes pants socks}, %w{ shirt skirt shoes }).should be_close(0.4, 0.0001)
0
+ GSL::Similarity.tanimoto_coefficient(%w{ shirt shoes pants socks }, %w{ shirt skirt shoes }).should be_close(0.4, 0.0001)
0
+ end
0
+ end
0
+
0
+ describe "calculating Dice's coefficient" do
0
+ it "should return 0.0 if either of the sets are empty" do
0
+ GSL::Similarity.dice_coefficient(%w{ shirt shoes pants socks}, []).should be_close(0.0, 0.0001)
0
+ GSL::Similarity.dice_coefficient([], %w{ shirt skirt shoes }).should be_close(0.0, 0.0001)
0
+ GSL::Similarity.dice_coefficient([], []).should be_close(0.0, 0.0001)
0
+ end
0
+
0
+ it "should return a float" do
0
+ GSL::Similarity.dice_coefficient(%w{ ni ig gh ht }, %w{ na ac ch ht }).should be_close(0.25, 0.0001)
0
     end
0
   end
0
 end
0
\ No newline at end of file

Comments

    No one has commented yet.