Skip to content

Commit

Permalink
Merge pull request #2393 from yebblies/utf8
Browse files Browse the repository at this point in the history
[DDMD] Use utf8_t* to represent unicode strings in dmd
  • Loading branch information
WalterBright committed Jul 28, 2013
2 parents e230276 + ba82b04 commit 3599dc6
Show file tree
Hide file tree
Showing 26 changed files with 268 additions and 266 deletions.
10 changes: 5 additions & 5 deletions src/attrib.c
Expand Up @@ -204,7 +204,7 @@ void AttribDeclaration::inlineScan()
}
}

void AttribDeclaration::addComment(unsigned char *comment)
void AttribDeclaration::addComment(utf8_t *comment)
{
//printf("AttribDeclaration::addComment %s\n", comment);
if (comment)
Expand Down Expand Up @@ -1127,7 +1127,7 @@ void PragmaDeclaration::semantic(Scope *sc)
*/
for (size_t i = 0; i < se->len; )
{
unsigned char *p = (unsigned char *)se->string;
utf8_t *p = (utf8_t *)se->string;
dchar_t c = p[i];
if (c < 0x80)
{
Expand All @@ -1146,7 +1146,7 @@ void PragmaDeclaration::semantic(Scope *sc)
}
}

if (const char* msg = utf_decodeChar((unsigned char *)se->string, se->len, &i, &c))
if (const char* msg = utf_decodeChar((utf8_t *)se->string, se->len, &i, &c))
{
error("%s", msg);
break;
Expand Down Expand Up @@ -1354,7 +1354,7 @@ void ConditionalDeclaration::importAll(Scope *sc)
}
}

void ConditionalDeclaration::addComment(unsigned char *comment)
void ConditionalDeclaration::addComment(utf8_t *comment)
{
/* Because addComment is called by the parser, if we called
* include() it would define a version before it was used.
Expand Down Expand Up @@ -1592,7 +1592,7 @@ void CompileDeclaration::compileIt(Scope *sc)
else
{
se = se->toUTF8(sc);
Parser p(sc->module, (unsigned char *)se->string, se->len, 0);
Parser p(sc->module, (utf8_t *)se->string, se->len, 0);
p.loc = loc;
p.nextToken();
decl = p.parseDeclDefs(0);
Expand Down
4 changes: 2 additions & 2 deletions src/attrib.h
Expand Up @@ -46,7 +46,7 @@ class AttribDeclaration : public Dsymbol
void semantic2(Scope *sc);
void semantic3(Scope *sc);
void inlineScan();
void addComment(unsigned char *comment);
void addComment(utf8_t *comment);
void emitComment(Scope *sc);
const char *kind();
bool oneMember(Dsymbol **ps, Identifier *ident);
Expand Down Expand Up @@ -171,7 +171,7 @@ class ConditionalDeclaration : public AttribDeclaration
bool oneMember(Dsymbol **ps, Identifier *ident);
void emitComment(Scope *sc);
Dsymbols *include(Scope *sc, ScopeDsymbol *s);
void addComment(unsigned char *comment);
void addComment(utf8_t *comment);
void toCBuffer(OutBuffer *buf, HdrGenState *hgs);
void toJson(JsonOut *json);
void importAll(Scope *sc);
Expand Down
6 changes: 3 additions & 3 deletions src/cast.c
Expand Up @@ -1362,7 +1362,7 @@ Expression *StringExp::castTo(Scope *sc, Type *t)
case X(Tchar, Twchar):
for (size_t u = 0; u < len;)
{ unsigned c;
const char *p = utf_decodeChar((unsigned char *)se->string, len, &u, &c);
const char *p = utf_decodeChar((utf8_t *)se->string, len, &u, &c);
if (p)
error("%s", p);
else
Expand All @@ -1375,7 +1375,7 @@ Expression *StringExp::castTo(Scope *sc, Type *t)
case X(Tchar, Tdchar):
for (size_t u = 0; u < len;)
{ unsigned c;
const char *p = utf_decodeChar((unsigned char *)se->string, len, &u, &c);
const char *p = utf_decodeChar((utf8_t *)se->string, len, &u, &c);
if (p)
error("%s", p);
buffer.write4(c);
Expand Down Expand Up @@ -1474,7 +1474,7 @@ Expression *StringExp::castTo(Scope *sc, Type *t)
// Copy when changing the string literal
size_t newsz = se->sz;
size_t d = (dim2 < se->len) ? dim2 : se->len;
void *s = (unsigned char *)mem.malloc((dim2 + 1) * newsz);
void *s = (void *)mem.malloc((dim2 + 1) * newsz);
memcpy(s, se->string, d * newsz);
// Extend with 0, add terminating 0
memset((char *)s + d * newsz, 0, (dim2 + 1 - d) * newsz);
Expand Down
36 changes: 18 additions & 18 deletions src/constfold.c
Expand Up @@ -1446,8 +1446,8 @@ Expression *Slice(Type *type, Expression *e1, Expression *lwr, Expression *upr)
StringExp *es;

s = mem.malloc((len + 1) * sz);
memcpy((unsigned char *)s, (unsigned char *)es1->string + ilwr * sz, len * sz);
memset((unsigned char *)s + len * sz, 0, sz);
memcpy((utf8_t *)s, (utf8_t *)es1->string + ilwr * sz, len * sz);
memset((utf8_t *)s + len * sz, 0, sz);

es = new StringExp(loc, s, len, es1->postfix);
es->sz = sz;
Expand Down Expand Up @@ -1489,7 +1489,7 @@ void sliceAssignArrayLiteralFromString(ArrayLiteralExp *existingAE, StringExp *n
{
size_t newlen = newval->len;
size_t sz = newval->sz;
unsigned char *s = (unsigned char *)newval->string;
utf8_t *s = (utf8_t *)newval->string;
Type *elemType = existingAE->type->nextOf();
for (size_t j = 0; j < newlen; j++)
{
Expand All @@ -1513,7 +1513,7 @@ void sliceAssignArrayLiteralFromString(ArrayLiteralExp *existingAE, StringExp *n
*/
void sliceAssignStringFromArrayLiteral(StringExp *existingSE, ArrayLiteralExp *newae, size_t firstIndex)
{
unsigned char *s = (unsigned char *)existingSE->string;
utf8_t *s = (utf8_t *)existingSE->string;
for (size_t j = 0; j < newae->elements->dim; j++)
{
unsigned value = (unsigned)((*newae->elements)[j]->toInteger());
Expand All @@ -1534,7 +1534,7 @@ void sliceAssignStringFromArrayLiteral(StringExp *existingSE, ArrayLiteralExp *n
*/
void sliceAssignStringFromString(StringExp *existingSE, StringExp *newstr, size_t firstIndex)
{
unsigned char *s = (unsigned char *)existingSE->string;
utf8_t *s = (utf8_t *)existingSE->string;
size_t sz = existingSE->sz;
assert(sz == newstr->sz);
memcpy(s + firstIndex * sz, newstr->string, sz * newstr->len);
Expand All @@ -1545,8 +1545,8 @@ void sliceAssignStringFromString(StringExp *existingSE, StringExp *newstr, size_
*/
int sliceCmpStringWithString(StringExp *se1, StringExp *se2, size_t lo1, size_t lo2, size_t len)
{
unsigned char *s1 = (unsigned char *)se1->string;
unsigned char *s2 = (unsigned char *)se2->string;
utf8_t *s1 = (utf8_t *)se1->string;
utf8_t *s2 = (utf8_t *)se2->string;
size_t sz = se1->sz;
assert(sz == se2->sz);

Expand All @@ -1558,7 +1558,7 @@ int sliceCmpStringWithString(StringExp *se1, StringExp *se2, size_t lo1, size_t
*/
int sliceCmpStringWithArray(StringExp *se1, ArrayLiteralExp *ae2, size_t lo1, size_t lo2, size_t len)
{
unsigned char *s = (unsigned char *)se1->string;
utf8_t *s = (utf8_t *)se1->string;
size_t sz = se1->sz;

for (size_t j = 0; j < len; j++)
Expand Down Expand Up @@ -1616,12 +1616,12 @@ Expression *Cat(Type *type, Expression *e1, Expression *e2)
size_t len = (t->ty == tn->ty) ? 1 : utf_codeLength(sz, v);
s = mem.malloc((len + 1) * sz);
if (t->ty == tn->ty)
memcpy((unsigned char *)s, &v, sz);
memcpy((utf8_t *)s, &v, sz);
else
utf_encode(sz, s, v);

// Add terminating 0
memset((unsigned char *)s + len * sz, 0, sz);
memset((utf8_t *)s + len * sz, 0, sz);

es = new StringExp(loc, s, len);
es->sz = sz;
Expand Down Expand Up @@ -1675,10 +1675,10 @@ Expression *Cat(Type *type, Expression *e1, Expression *e2)
}
s = mem.malloc((len + 1) * sz);
memcpy(s, es1->string, es1->len * sz);
memcpy((unsigned char *)s + es1->len * sz, es2->string, es2->len * sz);
memcpy((utf8_t *)s + es1->len * sz, es2->string, es2->len * sz);

// Add terminating 0
memset((unsigned char *)s + len * sz, 0, sz);
memset((utf8_t *)s + len * sz, 0, sz);

es = new StringExp(loc, s, len);
es->sz = sz;
Expand Down Expand Up @@ -1740,12 +1740,12 @@ Expression *Cat(Type *type, Expression *e1, Expression *e2)
s = mem.malloc((len + 1) * sz);
memcpy(s, es1->string, es1->len * sz);
if (homoConcat)
memcpy((unsigned char *)s + (sz * es1->len), &v, sz);
memcpy((utf8_t *)s + (sz * es1->len), &v, sz);
else
utf_encode(sz, (unsigned char *)s + (sz * es1->len), v);
utf_encode(sz, (utf8_t *)s + (sz * es1->len), v);

// Add terminating 0
memset((unsigned char *)s + len * sz, 0, sz);
memset((utf8_t *)s + len * sz, 0, sz);

es = new StringExp(loc, s, len);
es->sz = sz;
Expand All @@ -1764,11 +1764,11 @@ Expression *Cat(Type *type, Expression *e1, Expression *e2)
dinteger_t v = e1->toInteger();

s = mem.malloc((len + 1) * sz);
memcpy((unsigned char *)s, &v, sz);
memcpy((unsigned char *)s + sz, es2->string, es2->len * sz);
memcpy((utf8_t *)s, &v, sz);
memcpy((utf8_t *)s + sz, es2->string, es2->len * sz);

// Add terminating 0
memset((unsigned char *)s + len * sz, 0, sz);
memset((utf8_t *)s + len * sz, 0, sz);

es = new StringExp(loc, s, len);
es->sz = sz;
Expand Down
16 changes: 7 additions & 9 deletions src/ctfeexpr.c
Expand Up @@ -238,8 +238,7 @@ Expression *copyLiteral(Expression *e)
if (e->op == TOKstring) // syntaxCopy doesn't make a copy for StringExp!
{
StringExp *se = (StringExp *)e;
unsigned char *s;
s = (unsigned char *)mem.calloc(se->len + 1, se->sz);
utf8_t *s = (utf8_t *)mem.calloc(se->len + 1, se->sz);
memcpy(s, se->string, se->len * se->sz);
StringExp *se2 = new StringExp(se->loc, s, se->len);
se2->committed = se->committed;
Expand Down Expand Up @@ -483,8 +482,7 @@ ArrayLiteralExp *createBlockDuplicatedArrayLiteral(Loc loc, Type *type,
StringExp *createBlockDuplicatedStringLiteral(Loc loc, Type *type,
unsigned value, size_t dim, int sz)
{
unsigned char *s;
s = (unsigned char *)mem.calloc(dim + 1, sz);
utf8_t *s = (utf8_t *)mem.calloc(dim + 1, sz);
for (size_t elemi = 0; elemi < dim; ++elemi)
{
switch (sz)
Expand Down Expand Up @@ -1596,11 +1594,11 @@ Expression *ctfeCat(Type *type, Expression *e1, Expression *e2)
if (es2e->op != TOKint64)
return EXP_CANT_INTERPRET;
dinteger_t v = es2e->toInteger();
memcpy((unsigned char *)s + i * sz, &v, sz);
memcpy((utf8_t *)s + i * sz, &v, sz);
}

// Add terminating 0
memset((unsigned char *)s + len * sz, 0, sz);
memset((utf8_t *)s + len * sz, 0, sz);

StringExp *es = new StringExp(loc, s, len);
es->sz = sz;
Expand All @@ -1627,11 +1625,11 @@ Expression *ctfeCat(Type *type, Expression *e1, Expression *e2)
if (es2e->op != TOKint64)
return EXP_CANT_INTERPRET;
dinteger_t v = es2e->toInteger();
memcpy((unsigned char *)s + (es1->len + i) * sz, &v, sz);
memcpy((utf8_t *)s + (es1->len + i) * sz, &v, sz);
}

// Add terminating 0
memset((unsigned char *)s + len * sz, 0, sz);
memset((utf8_t *)s + len * sz, 0, sz);

StringExp *es = new StringExp(loc, s, len);
es->sz = sz;
Expand Down Expand Up @@ -1923,7 +1921,7 @@ Expression *changeArrayLiteralLength(Loc loc, TypeArray *arrayType,
if (oldval->op == TOKstring)
{
StringExp *oldse = (StringExp *)oldval;
unsigned char *s = (unsigned char *)mem.calloc(newlen + 1, oldse->sz);
utf8_t *s = (utf8_t *)mem.calloc(newlen + 1, oldse->sz);
memcpy(s, oldse->string, copylen * oldse->sz);
unsigned defaultValue = (unsigned)(defaultElem->toInteger());
for (size_t elemi = copylen; elemi < newlen; ++elemi)
Expand Down

0 comments on commit 3599dc6

Please sign in to comment.