forked from antoniolm23/lz78
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hash.c
270 lines (208 loc) · 7.03 KB
/
hash.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
#include "hash.h"
//effective hash function
uint32_t lookup(const void *key, size_t length, uint32_t initval) {
//fprintf(stderr, "lookup function\n");
uint32_t a,b,c;
const uint8_t *k;
const uint32_t *data32Bit;
data32Bit = key;
a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
while (length > 12) {
a += *(data32Bit++);
b += *(data32Bit++);
c += *(data32Bit++);
mix(a,b,c);
length -= 12;
}
k = (const uint8_t *)data32Bit;
switch (length) {
case 12: c += ((uint32_t)k[11])<<24;
case 11: c += ((uint32_t)k[10])<<16;
case 10: c += ((uint32_t)k[9])<<8;
case 9 : c += k[8];
case 8 : b += ((uint32_t)k[7])<<24;
case 7 : b += ((uint32_t)k[6])<<16;
case 6 : b += ((uint32_t)k[5])<<8;
case 5 : b += k[4];
case 4 : a += ((uint32_t)k[3])<<24;
case 3 : a += ((uint32_t)k[2])<<16;
case 2 : a += ((uint32_t)k[1])<<8;
case 1 : a += k[0];
break;
case 0 : return c;
}
final(a,b,c);
return c;
}
//list passed by reference, the insertion is always on the top of the list
int list_ins(int pos, int father, unsigned int c, collision_elem** list) {
//fprintf(stderr, "list_insertion %i %i\n", pos, father);
collision_elem* p, *q, *t;
//create a new element
p=(collision_elem*)malloc(sizeof(collision_elem));
p->elem_pos=pos;
p->elem_father=father;
p->next=NULL;
//insert in the top of the list
q=t=*list;
*list=p;
p->next=q;
return true;
}
//deleting of the list
void list_del(collision_elem* list) {
//fprintf(stderr, "list deletion\n");
if(list==NULL) return;
collision_elem* p, *q;
p=list;
q=p;
while(q->next!=NULL) {
p=q;
q=q->next;
free(p);
}
list=NULL;
}
//search in the list
int list_search(int index, int father, unsigned int symbol, ht_table* table, int* flag){
collision_elem* e=table->ht_array[index].next;
int pos;
//if the list is empty set the flag and return 0
if(e==NULL) {
*flag=NOT_FOUND;
return 0;
}
//scroll the list to see if there is the element
while(e!=NULL) {
//if the father is equal access in the position of the dictionary to see if the sons are equal
if(e->elem_father==father) {
pos=e->elem_pos;
if(table->ht_array[pos].ht_symbol==symbol) {
*flag=FOUND;
return pos;
}
}
e=e->next;
}
*flag=NOT_FOUND;
return 0;
}
//initialization of the dictioanry
void hash_init(int size, int symbols, ht_table* table) {
int i; //iterator
int y;
table->ht_array=malloc((sizeof(ht_entry))*size);
for(i = 0; i < symbols; i++) {
y=i;
table->ht_array[y].ht_father = ROOT;
table->ht_array[y].ht_symbol = i;
table->ht_array[y].label = i;
table->ht_array[y].next = NULL;
}
//Add EOFC to the table
table->ht_array[symbols].ht_father = ROOT;
table->ht_array[symbols].ht_symbol = EOFC;
table->ht_array[symbols].label = EOFC;
table->ht_array[symbols].next = NULL;
//fprintf(stderr, "end initialize first part\n");
symbols++;
//initialize the remaining part of the table
for(i = symbols; i < size; i++) {
table->ht_array[i].ht_symbol = EMPTY_ENTRY;
table->ht_array[i].ht_father = EMPTY_ENTRY;
table->ht_array[i].label=EMPTY_ENTRY;
table->ht_array[i].next= NULL;
}
//update all the other fields of the ht_table structure
table->next_free_entry=symbols;
table->total_size=size;
table->next_label=symbols;
//fprintf(stderr, "init: %i, %i, %i\n", table->next_free_entry,table->total_size, table->actual_size);
}
//hash function, returns the position
int hash(int father, int size) {
//fprintf(stderr, "hash calling\n");
unsigned int position;
unsigned int initval=12345;
//hash computation using the lookup function
position=lookup(&father, sizeof(int), initval);
//fprintf(stderr, "end hash calling %i\n", (position%SIZE));
return (position%size);
}
int hash_search(int* father,unsigned int child, ht_table* table) {
int index=0, pos=0; //used as indexes in the hash table
int tmp=*father; //temporary variable to be used as input of the hash function
int new_flag; //new flag to be used in case we have to search in the collision list
//construct the input of the hash function
tmp=tmp<<8;
tmp+=child;
index=hash(tmp, table->total_size);
//fprintf(stderr, "index: %i\n", index);
//check if the entry is empty
if(table->ht_array[index].ht_father==EMPTY_ENTRY && table->ht_array[index].ht_symbol==EMPTY_ENTRY)
//fprintf(stderr, "emptypos %i\n", index);
return index;
//check if the entry is occupied by the same symbol
if(table->ht_array[index].ht_father==(*father) && table->ht_array[index].ht_symbol==child) {
//fprintf(stderr, "yourpos\n");
*father=table->ht_array[index].label; //the new father is the pointer to the child node
return 0;
}
//search in the collision list
pos=list_search(index, *father, child, table, &new_flag);
if(new_flag==FOUND) {
*father=table->ht_array[pos].label; //the new father is the pointer to the child node
return 0;
}
//the entry is occupied by another symbol thus we have a collision, return the index, it's up
//to the insertion to manage collisions
return index;
}
//add a new child in the hash table
int hash_add(int index, int father, unsigned int symbol, ht_table* table) {
int i; //simple iterator
//int index; used to reach a position in the hash table
//index=hash_search(father, symbol, table, flag);
//fprintf(stderr, "flag: %i", *flag);
//if the position is occupied by another string then we have to manage collison
if(table->ht_array[index].ht_father != EMPTY_ENTRY) {
list_ins(table->next_free_entry, father, symbol, &table->ht_array[index].next);
index=table->next_free_entry;
for(i=table->next_free_entry; i<table->total_size; i++) {
if(table->ht_array[i].ht_father==EMPTY_ENTRY &&
table->ht_array[i].ht_symbol==EMPTY_ENTRY){
//fprintf(stderr, "\n\t\t\tnew free:%i\n\n\n", i);
table->next_free_entry=i;
break;
}
}
}
//now we can do the insertion
table->ht_array[index].ht_father=father;
table->ht_array[index].ht_symbol=symbol;
table->ht_array[index].label=table->next_label;
table->ht_array[index].next=NULL;
//now we did the insertion, we need to compute some statistics to see if we need a new table
table->next_label++;
//ratio=(double)(table->actual_size)/(double)(table->total_size);
//fprintf(stderr, "%f %f\n", ratio, max_r);
if(table->next_label>table->total_size) {
//fprintf(stderr, "full_dict\n");
return -1;
}
//fprintf(stderr, "hash_add: %i\n", table->ht_array[index].ht_father);
//all went ok, there is space we can continue using this dictionary
return 1;
}
//suppress the hash table
void hash_suppress(ht_table* table) {
int i;
//free the memory allocated to the collision lists
for(i=0; i<table->total_size; i++)
list_del(table->ht_array[i].next);
//free the memory allocated to the table
free(table->ht_array);
//reset the parameters of the table
table->next_label=0;
table->total_size=0;
}