forked from theturtle32/WebSocket-Node
-
Notifications
You must be signed in to change notification settings - Fork 0
/
validation.cc
144 lines (123 loc) · 4.29 KB
/
validation.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
/*!
* UTF-8 Validation Code originally from:
* ws: a node.js websocket client
* Copyright(c) 2011 Einar Otto Stangvik <einaros@gmail.com>
* MIT Licensed
*/
#include <v8.h>
#include <node.h>
#include <node_buffer.h>
#include <node_object_wrap.h>
#include <stdlib.h>
#include <wchar.h>
#include <stdio.h>
using namespace v8;
using namespace node;
#define UNI_SUR_HIGH_START (uint32_t) 0xD800
#define UNI_SUR_LOW_END (uint32_t) 0xDFFF
#define UNI_REPLACEMENT_CHAR (uint32_t) 0x0000FFFD
#define UNI_MAX_LEGAL_UTF32 (uint32_t) 0x0010FFFF
static const uint8_t trailingBytesForUTF8[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};
static const uint32_t offsetsFromUTF8[6] = {
0x00000000, 0x00003080, 0x000E2080,
0x03C82080, 0xFA082080, 0x82082080
};
static int isLegalUTF8(const uint8_t *source, const int length)
{
uint8_t a;
const uint8_t *srcptr = source+length;
switch (length) {
default: return 0;
/* Everything else falls through when "true"... */
/* RFC3629 makes 5 & 6 bytes UTF-8 illegal
case 6: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
case 5: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; */
case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
switch (*source) {
/* no fall-through in this inner switch */
case 0xE0: if (a < 0xA0) return 0; break;
case 0xED: if (a > 0x9F) return 0; break;
case 0xF0: if (a < 0x90) return 0; break;
case 0xF4: if (a > 0x8F) return 0; break;
default: if (a < 0x80) return 0;
}
case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
}
if (*source > 0xF4) return 0;
return 1;
}
int is_valid_utf8 (size_t len, char *value)
{
/* is the string valid UTF-8? */
for (size_t i = 0; i < len; i++) {
uint32_t ch = 0;
uint8_t extrabytes = trailingBytesForUTF8[(uint8_t) value[i]];
if (extrabytes + i >= len)
return 0;
if (isLegalUTF8 ((uint8_t *) (value + i), extrabytes + 1) == 0) return 0;
switch (extrabytes) {
case 5 : ch += (uint8_t) value[i++]; ch <<= 6;
case 4 : ch += (uint8_t) value[i++]; ch <<= 6;
case 3 : ch += (uint8_t) value[i++]; ch <<= 6;
case 2 : ch += (uint8_t) value[i++]; ch <<= 6;
case 1 : ch += (uint8_t) value[i++]; ch <<= 6;
case 0 : ch += (uint8_t) value[i];
}
ch -= offsetsFromUTF8[extrabytes];
if (ch <= UNI_MAX_LEGAL_UTF32) {
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
return 0;
} else {
return 0;
}
}
return 1;
}
class Validation : public ObjectWrap
{
public:
static void Initialize(v8::Handle<v8::Object> target)
{
HandleScope scope;
Local<FunctionTemplate> t = FunctionTemplate::New(New);
t->InstanceTemplate()->SetInternalFieldCount(1);
NODE_SET_METHOD(t->GetFunction(), "isValidUTF8", Validation::IsValidUTF8);
target->Set(String::NewSymbol("Validation"), t->GetFunction());
}
protected:
static Handle<Value> New(const Arguments& args)
{
HandleScope scope;
Validation* validation = new Validation();
validation->Wrap(args.This());
return args.This();
}
static Handle<Value> IsValidUTF8(const Arguments& args)
{
HandleScope scope;
if (!Buffer::HasInstance(args[0])) {
return ThrowException(Exception::Error(String::New("First argument needs to be a buffer")));
}
Local<Object> buffer_obj = args[0]->ToObject();
char *buffer_data = Buffer::Data(buffer_obj);
size_t buffer_length = Buffer::Length(buffer_obj);
return is_valid_utf8(buffer_length, buffer_data) == 1 ? scope.Close(True()) : scope.Close(False());
}
};
extern "C" void init (Handle<Object> target)
{
HandleScope scope;
Validation::Initialize(target);
}
NODE_MODULE(validation, init);