-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.c
183 lines (155 loc) · 5 KB
/
main.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#include "xxHash/xxhash.h"
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/queue.h>
#include <sys/tree.h>
/* Struct to represent a duplicate names */
struct DuplicateName {
char *fullName;
STAILQ_ENTRY(DuplicateName) entries;
};
/* Struct to represent a student node */
struct Node {
char *fullName;
char *firstName;
XXH64_hash_t hash;
RB_ENTRY(Node) entry;
STAILQ_HEAD(DuplicatesList, DuplicateName) duplicatesHead;
};
/* Function to compare two student nodes */
int node_cmp(struct Node *a, struct Node *b) {
if (a->hash < b->hash)
return -1;
else if (a->hash > b->hash)
return 1;
return 0;
}
RB_HEAD(Tree, Node) root = RB_INITIALIZER(&root);
RB_PROTOTYPE(Tree, Node, entry, node_cmp)
RB_GENERATE(Tree, Node, entry, node_cmp)
/* Function to parse the student file and build the student nodes */
void parseFile(char *fileName) {
FILE *fp = fopen(fileName, "r");
if (fp == NULL) {
puts("Could not open file\n");
exit(-1);
}
/* Check for file length */
fseek(fp, 0, SEEK_END);
int fileLength = ftell(fp);
rewind(fp);
if (fileLength == 0) {
puts("File is empty!");
exit(-1);
}
char *line = NULL;
size_t buffer_size = 0;
ssize_t line_length;
while ((line_length = getline(&line, &buffer_size, fp)) != -1) {
/* Find the position of the first space character */
char *firstSpace = strchr(line, ' ');
if (firstSpace == NULL) {
puts("No first name detected in this line!");
continue;
};
struct Node *student = malloc(sizeof(struct Node));
/* Dynamically allocate memory for the full name */
student->fullName = malloc((line_length + 1) * sizeof(char));
if (student->fullName == NULL) {
puts("Memory allocation failed.\n");
exit(-1);
}
/* Copy the full name into the struct */
strncpy(student->fullName, line, line_length + 1);
student->fullName[line_length - 1] = '\0';
/* Calculate length of the first name */
size_t firstNameLength = firstSpace - line;
/* Dynamically allocate memory for the first name */
student->firstName = malloc((firstNameLength + 1) * sizeof(char));
if (student->firstName == NULL) {
puts("Memory allocation failed.\n");
exit(-1);
}
/* Copy the first name into the struct */
strncpy(student->firstName, line, firstNameLength + 1);
student->firstName[firstNameLength] = '\0';
/* Create hash from the first name */
student->hash = XXH64(student->firstName, firstNameLength, 0);
/* Check if a node with the same key exists */
struct Node *existingNode = RB_FIND(Tree, &root, student);
if (existingNode == NULL) {
/* Initialize the duplicates list for this node */
STAILQ_INIT(&student->duplicatesHead);
RB_INSERT(Tree, &root, student);
continue;
}
/* Add fullName to duplicate list */
struct DuplicateName *duplicate = malloc(sizeof(struct DuplicateName));
if (duplicate == NULL) {
puts("Memory allocation failed.\n");
exit(-1);
}
/* Dynamically allocate memory for the duplicate name */
duplicate->fullName = malloc((line_length + 1) * sizeof(char));
strncpy(duplicate->fullName, line, line_length + 1);
duplicate->fullName[line_length - 1] = '\0';
STAILQ_INSERT_TAIL(&existingNode->duplicatesHead, duplicate, entries);
/* Remove memory from duplicate node structure*/
free(student->fullName);
free(student->firstName);
free(student);
}
fclose(fp);
free(line);
}
/* Function to print the student nodes and their duplicates */
void printStudents() {
struct Node *studentNode;
int count = 1;
RB_FOREACH(studentNode, Tree, &root) {
printf("\n|-_-_-_ Unique student: %d _-_-_-|\n", count);
printf("\tFull name: %s,\n\thash: %llu,\n\tFirst name: %s, \n",
studentNode->fullName, studentNode->hash, studentNode->firstName);
/* Print duplicates if any */
struct DuplicateName *duplicate;
STAILQ_FOREACH(duplicate, &studentNode->duplicatesHead, entries) {
printf("\t^--> First name duplicate: %s\n", duplicate->fullName);
}
count++;
}
}
/* Function to free the allocated memory */
void freeStudents() {
struct Node *var, *next;
for (var = RB_MIN(Tree, &root); var != NULL; var = next) {
next = RB_NEXT(Tree, &root, var);
RB_REMOVE(Tree, &root, var);
free(var->fullName);
free(var->firstName);
// Free duplicates list
struct DuplicateName *duplicate, *nextDuplicate;
STAILQ_FOREACH_SAFE(duplicate, &var->duplicatesHead, entries,
nextDuplicate) {
STAILQ_REMOVE(&var->duplicatesHead, duplicate, DuplicateName, entries);
free(duplicate->fullName);
free(duplicate);
}
free(var);
}
}
int main(int argc, char *argv[]) {
/* No command-line arguments provided */
if (argc < 2) {
char fileName[256];
printf("Please enter the name of the text file: ");
scanf("%255s", fileName);
parseFile(fileName);
} else {
parseFile(argv[1]);
}
printStudents();
freeStudents();
return 0;
}