forked from scikit-learn/scikit-learn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
weight_vector.pyx
127 lines (105 loc) · 3.74 KB
/
weight_vector.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# encoding: utf-8
# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False
#
# Author: Peter Prettenhofer <peter.prettenhofer@gmail.com>
#
# License: BSD Style.
import numpy as np
cimport numpy as np
cimport cython
cdef class WeightVector(object):
"""Dense vector represented by a scalar and a numpy array.
The class provides methods to ``add`` a sparse vector
and scale the vector.
Representing a vector explicitly as a scalar times a
vector allows for efficient scaling operations.
Attributes
----------
w : ndarray, dtype=np.float64, order='C'
The numpy array which backs the weight vector.
w_data_ptr : np.float64*
A pointer to the data of the numpy array.
wscale : double
The scale of the vector.
n_features : int
The number of features (= dimensionality of ``w``).
sq_norm : double
The squared norm of ``w``.
"""
def __cinit__(self, np.ndarray[DOUBLE, ndim=1, mode='c'] w):
self.w = w
self.w_data_ptr = <DOUBLE *>w.data
self.wscale = 1.0
self.n_features = w.shape[0]
self.sq_norm = np.dot(w, w)
cdef void add(self, DOUBLE *x_data_ptr, INTEGER *x_ind_ptr,
int xnnz, double c):
"""Scales example x by constant c and adds it to the weight vector.
This operation updates ``sq_norm``.
Parameters
----------
x_data_ptr : double*
The array which holds the feature values of ``x``.
x_ind_ptr : np.int32*
The array which holds the feature indices of ``x``.
xnnz : int
The number of non-zero features of ``x``.
c : double
The scaling constant for the example.
"""
cdef int j
cdef int idx
cdef double val
cdef double innerprod = 0.0
cdef double xsqnorm = 0.0
# the next two lines save a factor of 2!
cdef double wscale = self.wscale
cdef DOUBLE* w_data_ptr = self.w_data_ptr
for j in range(xnnz):
idx = x_ind_ptr[j]
val = x_data_ptr[j]
innerprod += (w_data_ptr[idx] * val)
xsqnorm += (val * val)
w_data_ptr[idx] += val * (c / wscale)
self.sq_norm += (xsqnorm * c * c) + (2.0 * innerprod * wscale * c)
cdef double dot(self, DOUBLE *x_data_ptr, INTEGER *x_ind_ptr, int xnnz):
"""Computes the dot product of a sample x and the weight vector.
Parameters
----------
x_data_ptr : double*
The array which holds the feature values of ``x``.
x_ind_ptr : np.int32*
The array which holds the feature indices of ``x``.
xnnz : int
The number of non-zero features of ``x``.
Returns
-------
innerprod : double
The inner product of ``x`` and ``w``.
"""
cdef int j
cdef int idx
cdef double innerprod = 0.0
cdef DOUBLE* w_data_ptr = self.w_data_ptr
for j in range(xnnz):
idx = x_ind_ptr[j]
innerprod += w_data_ptr[idx] * x_data_ptr[j]
innerprod *= self.wscale
return innerprod
cdef void scale(self, double c):
"""Scales the weight vector by a constant ``c``.
It updates ``wscale`` and ``sq_norm``. If ``wscale`` gets too
small we call ``reset_swcale``."""
self.wscale *= c
self.sq_norm *= (c * c)
if self.wscale < 1e-9:
self.reset_wscale()
cdef void reset_wscale(self):
"""Scales each coef of ``w`` by ``wscale`` and resets it to 1. """
self.w *= self.wscale
self.wscale = 1.0
cdef double norm(self):
"""The L2 norm of the weight vector. """
return sqrt(self.sq_norm)