-
Notifications
You must be signed in to change notification settings - Fork 17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
implement delta matrix in rust and move to use incidence matrix #595
base: master
Are you sure you want to change the base?
Changes from 13 commits
615fc27
b1ef4a1
fa17331
cbbb241
89d6c98
33940a5
abdfff5
f3e5f01
ace5c2b
5c9f4d8
81f5a8d
d8b72a7
4937899
f5ac937
4cf3f0d
7111fc4
70e02f0
a25da1c
b53a776
eeb387b
d6332b1
5ab1b5a
f1b57ab
2ae6ed8
def1a16
ebfe445
09d1b7a
903c00a
4f340b4
a558020
7ebafeb
b5d14f7
a58a624
f8b3713
41ec2d2
a05199b
2aa805f
fec36c1
980eb5a
3bcf342
b7b5dda
93088e6
e7deba3
96508e8
144d546
d04f36e
81e21f6
7e1dbab
fdcf95e
23d5056
eb46032
929bf84
43308cf
7be62f0
4173e3c
72966f8
5834e29
1be8df0
0fe7f41
ad90708
621705a
b589b9e
c84f9bf
b7082bc
f5f92bd
ed1d2fa
e47b26b
118b556
0b24001
37bf3fb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,15 +40,15 @@ static int* _BulkInsert_ReadHeaderLabels | |
ASSERT(data != NULL); | ||
ASSERT(data_idx != NULL); | ||
|
||
// first sequence is entity label(s) | ||
const char* labels = data + *data_idx; | ||
int labels_len = strlen(labels); | ||
*data_idx += labels_len + 1; | ||
// first sequence is entity label(s) | ||
const char* labels = data + *data_idx; | ||
int labels_len = strlen(labels); | ||
*data_idx += labels_len + 1; | ||
|
||
// array of all label IDs | ||
int* label_ids = array_new(int, 1); | ||
// stack variable to contain a single label | ||
char label[labels_len + 1]; | ||
// array of all label IDs | ||
int* label_ids = array_new(int, 1); | ||
// stack variable to contain a single label | ||
char label[labels_len + 1]; | ||
|
||
while (true) { | ||
// look for a colon delimiting another label | ||
|
@@ -82,7 +82,7 @@ static int* _BulkInsert_ReadHeaderLabels | |
if (!found) break; | ||
} | ||
|
||
return label_ids; | ||
return label_ids; | ||
} | ||
|
||
// read the property keys from a header | ||
|
@@ -99,15 +99,15 @@ static AttributeID* _BulkInsert_ReadHeaderProperties | |
ASSERT(data_idx != NULL); | ||
ASSERT(prop_count != NULL); | ||
|
||
// next 4 bytes are property count | ||
*prop_count = *(uint*)&data[*data_idx]; | ||
*data_idx += sizeof(unsigned int); | ||
// next 4 bytes are property count | ||
*prop_count = *(uint*)&data[*data_idx]; | ||
*data_idx += sizeof(unsigned int); | ||
|
||
if (*prop_count == 0) return NULL; | ||
if (*prop_count == 0) return NULL; | ||
|
||
AttributeID* prop_indices = rm_malloc(*prop_count * sizeof(AttributeID)); | ||
AttributeID* prop_indices = rm_malloc(*prop_count * sizeof(AttributeID)); | ||
|
||
// the rest of the line is [char *prop_key] * prop_count | ||
// the rest of the line is [char *prop_key] * prop_count | ||
Comment on lines
+102
to
+110
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Memory leak potential in The function - return prop_indices;
+ if (error_condition) {
+ rm_free(prop_indices);
+ return NULL;
+ } else {
+ return prop_indices;
+ }
ToolsGitHub Check: codecov/patch
|
||
for (uint j = 0; j < *prop_count; j++) { | ||
char* prop_key = (char*)data + *data_idx; | ||
*data_idx += strlen(prop_key) + 1; | ||
|
@@ -116,7 +116,7 @@ static AttributeID* _BulkInsert_ReadHeaderProperties | |
prop_indices[j] = GraphContext_FindOrAddAttribute(gc, prop_key, NULL); | ||
} | ||
|
||
return prop_indices; | ||
return prop_indices; | ||
} | ||
|
||
// read an SIValue from the data stream and update the index appropriately | ||
|
@@ -125,7 +125,7 @@ static SIValue _BulkInsert_ReadProperty | |
const char* data, | ||
size_t* data_idx | ||
) { | ||
// binary property format: | ||
// binary property format: | ||
// - property type : 1-byte integer corresponding to TYPE enum | ||
// - Nothing if type is NULL | ||
// - 1-byte true/false if type is boolean | ||
|
@@ -134,16 +134,16 @@ static SIValue _BulkInsert_ReadProperty | |
// - Null-terminated C string if type is string | ||
// - 8-byte array length followed by N values if type is array | ||
|
||
// possible property values | ||
bool b; | ||
double d; | ||
int64_t i; | ||
int64_t len; | ||
const char* s; | ||
// possible property values | ||
bool b; | ||
double d; | ||
int64_t i; | ||
int64_t len; | ||
const char* s; | ||
|
||
SIValue v = SI_NullVal(); | ||
TYPE t = data[*data_idx]; | ||
*data_idx += 1; | ||
SIValue v = SI_NullVal(); | ||
TYPE t = data[*data_idx]; | ||
*data_idx += 1; | ||
|
||
switch (t) { | ||
case BI_NULL: | ||
|
@@ -191,7 +191,7 @@ static SIValue _BulkInsert_ReadProperty | |
break; | ||
} | ||
|
||
return v; | ||
return v; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Validate property types in The switch statement in + default:
+ // Handle unknown property type
+ ASSERT(false && "Unknown property type encountered");
+ break;
ToolsGitHub Check: codecov/patch
|
||
} | ||
|
||
static int _BulkInsert_ProcessNodeFile | ||
|
@@ -200,30 +200,30 @@ static int _BulkInsert_ProcessNodeFile | |
const char* data, | ||
size_t data_len | ||
) { | ||
uint prop_count; | ||
size_t data_idx = 0; | ||
|
||
// read the CSV file header labels and update all schemas | ||
int* label_ids = _BulkInsert_ReadHeaderLabels(gc, SCHEMA_NODE, data, &data_idx); | ||
uint label_count = array_len(label_ids); | ||
// read the CSV header properties and collect their indices | ||
AttributeID* prop_indices = _BulkInsert_ReadHeaderProperties(gc, SCHEMA_NODE, data, | ||
uint prop_count; | ||
size_t data_idx = 0; | ||
|
||
// read the CSV file header labels and update all schemas | ||
int* label_ids = _BulkInsert_ReadHeaderLabels(gc, SCHEMA_NODE, data, &data_idx); | ||
uint label_count = array_len(label_ids); | ||
// read the CSV header properties and collect their indices | ||
AttributeID* prop_indices = _BulkInsert_ReadHeaderProperties(gc, SCHEMA_NODE, data, | ||
&data_idx, &prop_count); | ||
|
||
// sync each matrix once | ||
ASSERT(Graph_GetMatrixPolicy(gc->g) == SYNC_POLICY_RESIZE); | ||
// sync each matrix once | ||
ASSERT(Graph_GetMatrixPolicy(gc->g) == SYNC_POLICY_RESIZE); | ||
|
||
for (uint i = 0; i < label_count; i++) { | ||
Graph_GetLabelMatrix(gc->g, label_ids[i]); | ||
} | ||
|
||
// sync node-label matrix | ||
Graph_GetNodeLabelMatrix(gc->g); | ||
Graph_SetMatrixPolicy(gc->g, SYNC_POLICY_NOP); | ||
// sync node-label matrix | ||
Graph_GetNodeLabelMatrix(gc->g); | ||
Graph_SetMatrixPolicy(gc->g, SYNC_POLICY_NOP); | ||
|
||
//-------------------------------------------------------------------------- | ||
// load nodes | ||
//-------------------------------------------------------------------------- | ||
//-------------------------------------------------------------------------- | ||
// load nodes | ||
//-------------------------------------------------------------------------- | ||
|
||
while (data_idx < data_len) { | ||
Node n = GE_NEW_NODE(); | ||
|
@@ -240,11 +240,11 @@ static int _BulkInsert_ProcessNodeFile | |
} | ||
} | ||
|
||
Graph_SetMatrixPolicy(gc->g, SYNC_POLICY_RESIZE); | ||
if (prop_indices) rm_free(prop_indices); | ||
array_free(label_ids); | ||
Graph_SetMatrixPolicy(gc->g, SYNC_POLICY_RESIZE); | ||
if (prop_indices) rm_free(prop_indices); | ||
array_free(label_ids); | ||
|
||
return BULK_OK; | ||
return BULK_OK; | ||
} | ||
|
||
static int _BulkInsert_ProcessEdgeFile | ||
|
@@ -253,31 +253,33 @@ static int _BulkInsert_ProcessEdgeFile | |
const char* data, | ||
size_t data_len | ||
) { | ||
int relation_id; | ||
uint prop_count; | ||
size_t data_idx = 0; | ||
int relation_id; | ||
uint prop_count; | ||
size_t data_idx = 0; | ||
|
||
// read the CSV file header | ||
// and commit all labels and properties it introduces | ||
int* type_ids = _BulkInsert_ReadHeaderLabels(gc, SCHEMA_EDGE, data, &data_idx); | ||
uint type_count = array_len(type_ids); | ||
// read the CSV file header | ||
// and commit all labels and properties it introduces | ||
int* type_ids = _BulkInsert_ReadHeaderLabels(gc, SCHEMA_EDGE, data, &data_idx); | ||
uint type_count = array_len(type_ids); | ||
|
||
// edges can only have one type | ||
ASSERT(type_count == 1); | ||
// edges can only have one type | ||
ASSERT(type_count == 1); | ||
|
||
int type_id = type_ids[0]; | ||
AttributeID* prop_indices = _BulkInsert_ReadHeaderProperties(gc, SCHEMA_EDGE, | ||
int type_id = type_ids[0]; | ||
AttributeID* prop_indices = _BulkInsert_ReadHeaderProperties(gc, SCHEMA_EDGE, | ||
data, &data_idx, &prop_count); | ||
|
||
// sync matrix once | ||
ASSERT(Graph_GetMatrixPolicy(gc->g) == SYNC_POLICY_RESIZE); | ||
Graph_GetRelationMatrix(gc->g, type_id, false); | ||
Graph_GetAdjacencyMatrix(gc->g, false); | ||
Graph_SetMatrixPolicy(gc->g, SYNC_POLICY_NOP); | ||
// sync matrix once | ||
ASSERT(Graph_GetMatrixPolicy(gc->g) == SYNC_POLICY_RESIZE); | ||
Graph_GetRelationMatrix(gc->g, type_id, false); | ||
Graph_GetSourceRelationMatrix(gc->g, type_id, false); | ||
Graph_GetTargetRelationMatrix(gc->g, type_id, false); | ||
Graph_GetAdjacencyMatrix(gc->g, false); | ||
Graph_SetMatrixPolicy(gc->g, SYNC_POLICY_NOP); | ||
|
||
//-------------------------------------------------------------------------- | ||
// load edges | ||
//-------------------------------------------------------------------------- | ||
//-------------------------------------------------------------------------- | ||
// load edges | ||
//-------------------------------------------------------------------------- | ||
|
||
while (data_idx < data_len) { | ||
Edge e; | ||
|
@@ -305,11 +307,11 @@ static int _BulkInsert_ProcessEdgeFile | |
} | ||
} | ||
|
||
array_free(type_ids); | ||
if (prop_indices) rm_free(prop_indices); | ||
Graph_SetMatrixPolicy(gc->g, SYNC_POLICY_RESIZE); | ||
array_free(type_ids); | ||
if (prop_indices) rm_free(prop_indices); | ||
Graph_SetMatrixPolicy(gc->g, SYNC_POLICY_RESIZE); | ||
|
||
return BULK_OK; | ||
return BULK_OK; | ||
} | ||
|
||
static int _BulkInsert_ProcessTokens | ||
|
@@ -330,7 +332,7 @@ static int _BulkInsert_ProcessTokens | |
ASSERT(rc == BULK_OK); | ||
} | ||
|
||
return BULK_OK; | ||
return BULK_OK; | ||
} | ||
|
||
int BulkInsert | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Improve error handling in label processing.
The function
_BulkInsert_ReadHeaderLabels
processes labels but does not handle potential errors such as memory allocation failures or incorrect label formats robustly. Consider adding error handling mechanisms to manage such scenarios gracefully.Tools
GitHub Check: codecov/patch