-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[TableGen] Minor cleanup in StringToOffsetTable
#147712
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Make `AppendZero` a class member instead of an argument to `GetOrAddStringOffset` to reflect the intended usage that for a given `StringToOffsetTable`, all strings must use the same value of `AppendZero`. Modify `EmitStringTableDef` to drop the `Indent` argument as its always set to `""`, and to fail if it's called for a table with non-null-terminated strings.
47e18f2
to
65e8ae3
Compare
@llvm/pr-subscribers-llvm-adt Author: Rahul Joshi (jurahul) ChangesMake Modify Full diff: https://github.com/llvm/llvm-project/pull/147712.diff 8 Files Affected:
diff --git a/llvm/include/llvm/ADT/StringMapEntry.h b/llvm/include/llvm/ADT/StringMapEntry.h
index 842cf704de479..21be5ec343059 100644
--- a/llvm/include/llvm/ADT/StringMapEntry.h
+++ b/llvm/include/llvm/ADT/StringMapEntry.h
@@ -110,7 +110,7 @@ class StringMapEntry final : public StringMapEntryStorage<ValueTy> {
}
/// getKeyData - Return the start of the string data that is the key for this
- /// value. The string data is always stored immediately after the
+ /// value. The string data is always stored immediately after the
/// StringMapEntry object.
const char *getKeyData() const {
return reinterpret_cast<const char *>(this + 1);
diff --git a/llvm/include/llvm/TableGen/StringToOffsetTable.h b/llvm/include/llvm/TableGen/StringToOffsetTable.h
index 21795644d4bd6..1550515ce3d7b 100644
--- a/llvm/include/llvm/TableGen/StringToOffsetTable.h
+++ b/llvm/include/llvm/TableGen/StringToOffsetTable.h
@@ -23,9 +23,10 @@ namespace llvm {
class StringToOffsetTable {
StringMap<unsigned> StringOffset;
std::string AggregateString;
+ const bool AppendZero;
public:
- StringToOffsetTable() {
+ StringToOffsetTable(bool AppendZero = true) : AppendZero(AppendZero) {
// Ensure we always put the empty string at offset zero. That lets empty
// initialization also be zero initialization for offsets into the table.
GetOrAddStringOffset("");
@@ -34,7 +35,7 @@ class StringToOffsetTable {
bool empty() const { return StringOffset.empty(); }
size_t size() const { return AggregateString.size(); }
- unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true);
+ unsigned GetOrAddStringOffset(StringRef Str);
// Returns the offset of `Str` in the table if its preset, else return
// std::nullopt.
@@ -45,7 +46,7 @@ class StringToOffsetTable {
return II->second;
}
- // Emit a string table definition with the provided name and indent.
+ // Emit a string table definition with the provided name.
//
// When possible, this uses string-literal concatenation to emit the string
// contents in a readable and searchable way. However, for (very) large string
@@ -56,8 +57,7 @@ class StringToOffsetTable {
// The string table, and its input string contents, are always emitted as both
// `static` and `constexpr`. Both `Name` and (`Name` + "Storage") must be
// valid identifiers to declare.
- void EmitStringTableDef(raw_ostream &OS, const Twine &Name,
- const Twine &Indent = "") const;
+ void EmitStringTableDef(raw_ostream &OS, const Twine &Name) const;
// Emit the string as one single string.
void EmitString(raw_ostream &O) const;
diff --git a/llvm/lib/TableGen/StringToOffsetTable.cpp b/llvm/lib/TableGen/StringToOffsetTable.cpp
index 0db3f35f1c4b3..17e1e660c15ee 100644
--- a/llvm/lib/TableGen/StringToOffsetTable.cpp
+++ b/llvm/lib/TableGen/StringToOffsetTable.cpp
@@ -9,32 +9,37 @@
#include "llvm/TableGen/StringToOffsetTable.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Main.h"
using namespace llvm;
-unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str,
- bool appendZero) {
+unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str) {
auto [II, Inserted] = StringOffset.insert({Str, size()});
if (Inserted) {
// Add the string to the aggregate if this is the first time found.
AggregateString.append(Str.begin(), Str.end());
- if (appendZero)
+ if (AppendZero)
AggregateString += '\0';
}
return II->second;
}
-void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
- const Twine &Indent) const {
+void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS,
+ const Twine &Name) const {
+ // This generates a `llvm::StringTable` which expects that entries are null
+ // terminated. So fail with an error if `AppendZero` is false.
+ if (!AppendZero)
+ PrintFatalError("llvm::StringTable requires null terminated strings");
+
OS << formatv(R"(
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Woverlength-strings"
#endif
-{0}static constexpr char {1}Storage[] = )",
- Indent, Name);
+static constexpr char {}Storage[] = )",
+ Name);
// MSVC silently miscompiles string literals longer than 64k in some
// circumstances. The build system sets EmitLongStrLiterals to false when it
@@ -54,7 +59,7 @@ void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
"Expected empty string at the end due to terminators!");
Strings.pop_back();
for (StringRef Str : Strings) {
- OS << LineSep << Indent << " ";
+ OS << LineSep << " ";
// If we can, just emit this as a string literal to be concatenated.
if (!UseChars) {
OS << "\"";
@@ -71,17 +76,17 @@ void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
}
OS << CharSep << "'\\0'";
}
- OS << LineSep << Indent << (UseChars ? "};" : " ;");
+ OS << LineSep << (UseChars ? "};" : " ;");
OS << formatv(R"(
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
-{0}static constexpr llvm::StringTable {1} =
-{0} {1}Storage;
+static constexpr llvm::StringTable
+{0} = {0}Storage;
)",
- Indent, Name);
+ Name);
}
void StringToOffsetTable::EmitString(raw_ostream &O) const {
diff --git a/llvm/test/TableGen/MixedCasedMnemonic.td b/llvm/test/TableGen/MixedCasedMnemonic.td
index 14ab104c7e120..cb224ac59c6de 100644
--- a/llvm/test/TableGen/MixedCasedMnemonic.td
+++ b/llvm/test/TableGen/MixedCasedMnemonic.td
@@ -41,7 +41,7 @@ def :MnemonicAlias<"InstB", "BInst">;
// Check that the matcher lower()s the mnemonics it matches.
// MATCHER: static const char MnemonicTable[] =
-// MATCHER-NEXT: "\000\005ainst\005binst";
+// MATCHER-NEXT: "\005ainst\005binst";
// Check that aInst appears before BInst in the match table.
// This shows that the mnemonics are sorted in a case-insensitive way,
diff --git a/llvm/test/TableGen/SDNodeInfoEmitter/basic.td b/llvm/test/TableGen/SDNodeInfoEmitter/basic.td
index b8bff520bbcaa..2b4c76a0d4543 100644
--- a/llvm/test/TableGen/SDNodeInfoEmitter/basic.td
+++ b/llvm/test/TableGen/SDNodeInfoEmitter/basic.td
@@ -35,8 +35,8 @@ def MyTarget : Target;
// CHECK-NEXT: #pragma GCC diagnostic pop
// CHECK-NEXT: #endif
// CHECK-EMPTY:
-// CHECK-NEXT: static constexpr llvm::StringTable MyTargetSDNodeNames =
-// CHECK-NEXT: MyTargetSDNodeNamesStorage;
+// CHECK-NEXT: static constexpr llvm::StringTable
+// CHECK-NEXT: MyTargetSDNodeNames = MyTargetSDNodeNamesStorage;
// CHECK-EMPTY:
// CHECK-NEXT: static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
// CHECK-NEXT: /* dummy */ {SDTCisVT, 0, 0, MVT::INVALID_SIMPLE_VALUE_TYPE}
diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index 6dec4110decbe..bfd158614ae39 100644
--- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -3440,7 +3440,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
if (!ReportMultipleNearMisses)
emitAsmTiedOperandConstraints(Target, Info, OS, HasOptionalOperands);
- StringToOffsetTable StringTable;
+ StringToOffsetTable StringTable(/*AppendZero=*/false);
size_t MaxNumOperands = 0;
unsigned MaxMnemonicIndex = 0;
@@ -3451,8 +3451,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
// Store a pascal-style length byte in the mnemonic.
std::string LenMnemonic = char(MI->Mnemonic.size()) + MI->Mnemonic.lower();
- MaxMnemonicIndex = std::max(
- MaxMnemonicIndex, StringTable.GetOrAddStringOffset(LenMnemonic, false));
+ MaxMnemonicIndex = std::max(MaxMnemonicIndex,
+ StringTable.GetOrAddStringOffset(LenMnemonic));
}
OS << "static const char MnemonicTable[] =\n";
diff --git a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
index 2876a0e2ad546..ac5c455ed63ce 100644
--- a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
@@ -252,7 +252,7 @@ void IntrinsicEmitter::EmitIntrinsicToNameTable(
)";
- Table.EmitStringTableDef(OS, "IntrinsicNameTable", /*Indent=*/"");
+ Table.EmitStringTableDef(OS, "IntrinsicNameTable");
OS << R"(
static constexpr unsigned IntrinsicNameOffsetTable[] = {
diff --git a/llvm/utils/TableGen/OptionParserEmitter.cpp b/llvm/utils/TableGen/OptionParserEmitter.cpp
index f4964282934eb..a470fbbcadd58 100644
--- a/llvm/utils/TableGen/OptionParserEmitter.cpp
+++ b/llvm/utils/TableGen/OptionParserEmitter.cpp
@@ -291,7 +291,7 @@ static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) {
OS << "/////////\n";
OS << "// String table\n\n";
OS << "#ifdef OPTTABLE_STR_TABLE_CODE\n";
- Table.EmitStringTableDef(OS, "OptionStrTable", /*Indent=*/"");
+ Table.EmitStringTableDef(OS, "OptionStrTable");
OS << "#endif // OPTTABLE_STR_TABLE_CODE\n\n";
// Dump prefixes.
|
@llvm/pr-subscribers-tablegen Author: Rahul Joshi (jurahul) ChangesMake Modify Full diff: https://github.com/llvm/llvm-project/pull/147712.diff 8 Files Affected:
diff --git a/llvm/include/llvm/ADT/StringMapEntry.h b/llvm/include/llvm/ADT/StringMapEntry.h
index 842cf704de479..21be5ec343059 100644
--- a/llvm/include/llvm/ADT/StringMapEntry.h
+++ b/llvm/include/llvm/ADT/StringMapEntry.h
@@ -110,7 +110,7 @@ class StringMapEntry final : public StringMapEntryStorage<ValueTy> {
}
/// getKeyData - Return the start of the string data that is the key for this
- /// value. The string data is always stored immediately after the
+ /// value. The string data is always stored immediately after the
/// StringMapEntry object.
const char *getKeyData() const {
return reinterpret_cast<const char *>(this + 1);
diff --git a/llvm/include/llvm/TableGen/StringToOffsetTable.h b/llvm/include/llvm/TableGen/StringToOffsetTable.h
index 21795644d4bd6..1550515ce3d7b 100644
--- a/llvm/include/llvm/TableGen/StringToOffsetTable.h
+++ b/llvm/include/llvm/TableGen/StringToOffsetTable.h
@@ -23,9 +23,10 @@ namespace llvm {
class StringToOffsetTable {
StringMap<unsigned> StringOffset;
std::string AggregateString;
+ const bool AppendZero;
public:
- StringToOffsetTable() {
+ StringToOffsetTable(bool AppendZero = true) : AppendZero(AppendZero) {
// Ensure we always put the empty string at offset zero. That lets empty
// initialization also be zero initialization for offsets into the table.
GetOrAddStringOffset("");
@@ -34,7 +35,7 @@ class StringToOffsetTable {
bool empty() const { return StringOffset.empty(); }
size_t size() const { return AggregateString.size(); }
- unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true);
+ unsigned GetOrAddStringOffset(StringRef Str);
// Returns the offset of `Str` in the table if its preset, else return
// std::nullopt.
@@ -45,7 +46,7 @@ class StringToOffsetTable {
return II->second;
}
- // Emit a string table definition with the provided name and indent.
+ // Emit a string table definition with the provided name.
//
// When possible, this uses string-literal concatenation to emit the string
// contents in a readable and searchable way. However, for (very) large string
@@ -56,8 +57,7 @@ class StringToOffsetTable {
// The string table, and its input string contents, are always emitted as both
// `static` and `constexpr`. Both `Name` and (`Name` + "Storage") must be
// valid identifiers to declare.
- void EmitStringTableDef(raw_ostream &OS, const Twine &Name,
- const Twine &Indent = "") const;
+ void EmitStringTableDef(raw_ostream &OS, const Twine &Name) const;
// Emit the string as one single string.
void EmitString(raw_ostream &O) const;
diff --git a/llvm/lib/TableGen/StringToOffsetTable.cpp b/llvm/lib/TableGen/StringToOffsetTable.cpp
index 0db3f35f1c4b3..17e1e660c15ee 100644
--- a/llvm/lib/TableGen/StringToOffsetTable.cpp
+++ b/llvm/lib/TableGen/StringToOffsetTable.cpp
@@ -9,32 +9,37 @@
#include "llvm/TableGen/StringToOffsetTable.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Main.h"
using namespace llvm;
-unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str,
- bool appendZero) {
+unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str) {
auto [II, Inserted] = StringOffset.insert({Str, size()});
if (Inserted) {
// Add the string to the aggregate if this is the first time found.
AggregateString.append(Str.begin(), Str.end());
- if (appendZero)
+ if (AppendZero)
AggregateString += '\0';
}
return II->second;
}
-void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
- const Twine &Indent) const {
+void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS,
+ const Twine &Name) const {
+ // This generates a `llvm::StringTable` which expects that entries are null
+ // terminated. So fail with an error if `AppendZero` is false.
+ if (!AppendZero)
+ PrintFatalError("llvm::StringTable requires null terminated strings");
+
OS << formatv(R"(
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Woverlength-strings"
#endif
-{0}static constexpr char {1}Storage[] = )",
- Indent, Name);
+static constexpr char {}Storage[] = )",
+ Name);
// MSVC silently miscompiles string literals longer than 64k in some
// circumstances. The build system sets EmitLongStrLiterals to false when it
@@ -54,7 +59,7 @@ void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
"Expected empty string at the end due to terminators!");
Strings.pop_back();
for (StringRef Str : Strings) {
- OS << LineSep << Indent << " ";
+ OS << LineSep << " ";
// If we can, just emit this as a string literal to be concatenated.
if (!UseChars) {
OS << "\"";
@@ -71,17 +76,17 @@ void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
}
OS << CharSep << "'\\0'";
}
- OS << LineSep << Indent << (UseChars ? "};" : " ;");
+ OS << LineSep << (UseChars ? "};" : " ;");
OS << formatv(R"(
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
-{0}static constexpr llvm::StringTable {1} =
-{0} {1}Storage;
+static constexpr llvm::StringTable
+{0} = {0}Storage;
)",
- Indent, Name);
+ Name);
}
void StringToOffsetTable::EmitString(raw_ostream &O) const {
diff --git a/llvm/test/TableGen/MixedCasedMnemonic.td b/llvm/test/TableGen/MixedCasedMnemonic.td
index 14ab104c7e120..cb224ac59c6de 100644
--- a/llvm/test/TableGen/MixedCasedMnemonic.td
+++ b/llvm/test/TableGen/MixedCasedMnemonic.td
@@ -41,7 +41,7 @@ def :MnemonicAlias<"InstB", "BInst">;
// Check that the matcher lower()s the mnemonics it matches.
// MATCHER: static const char MnemonicTable[] =
-// MATCHER-NEXT: "\000\005ainst\005binst";
+// MATCHER-NEXT: "\005ainst\005binst";
// Check that aInst appears before BInst in the match table.
// This shows that the mnemonics are sorted in a case-insensitive way,
diff --git a/llvm/test/TableGen/SDNodeInfoEmitter/basic.td b/llvm/test/TableGen/SDNodeInfoEmitter/basic.td
index b8bff520bbcaa..2b4c76a0d4543 100644
--- a/llvm/test/TableGen/SDNodeInfoEmitter/basic.td
+++ b/llvm/test/TableGen/SDNodeInfoEmitter/basic.td
@@ -35,8 +35,8 @@ def MyTarget : Target;
// CHECK-NEXT: #pragma GCC diagnostic pop
// CHECK-NEXT: #endif
// CHECK-EMPTY:
-// CHECK-NEXT: static constexpr llvm::StringTable MyTargetSDNodeNames =
-// CHECK-NEXT: MyTargetSDNodeNamesStorage;
+// CHECK-NEXT: static constexpr llvm::StringTable
+// CHECK-NEXT: MyTargetSDNodeNames = MyTargetSDNodeNamesStorage;
// CHECK-EMPTY:
// CHECK-NEXT: static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
// CHECK-NEXT: /* dummy */ {SDTCisVT, 0, 0, MVT::INVALID_SIMPLE_VALUE_TYPE}
diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index 6dec4110decbe..bfd158614ae39 100644
--- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -3440,7 +3440,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
if (!ReportMultipleNearMisses)
emitAsmTiedOperandConstraints(Target, Info, OS, HasOptionalOperands);
- StringToOffsetTable StringTable;
+ StringToOffsetTable StringTable(/*AppendZero=*/false);
size_t MaxNumOperands = 0;
unsigned MaxMnemonicIndex = 0;
@@ -3451,8 +3451,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
// Store a pascal-style length byte in the mnemonic.
std::string LenMnemonic = char(MI->Mnemonic.size()) + MI->Mnemonic.lower();
- MaxMnemonicIndex = std::max(
- MaxMnemonicIndex, StringTable.GetOrAddStringOffset(LenMnemonic, false));
+ MaxMnemonicIndex = std::max(MaxMnemonicIndex,
+ StringTable.GetOrAddStringOffset(LenMnemonic));
}
OS << "static const char MnemonicTable[] =\n";
diff --git a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
index 2876a0e2ad546..ac5c455ed63ce 100644
--- a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
@@ -252,7 +252,7 @@ void IntrinsicEmitter::EmitIntrinsicToNameTable(
)";
- Table.EmitStringTableDef(OS, "IntrinsicNameTable", /*Indent=*/"");
+ Table.EmitStringTableDef(OS, "IntrinsicNameTable");
OS << R"(
static constexpr unsigned IntrinsicNameOffsetTable[] = {
diff --git a/llvm/utils/TableGen/OptionParserEmitter.cpp b/llvm/utils/TableGen/OptionParserEmitter.cpp
index f4964282934eb..a470fbbcadd58 100644
--- a/llvm/utils/TableGen/OptionParserEmitter.cpp
+++ b/llvm/utils/TableGen/OptionParserEmitter.cpp
@@ -291,7 +291,7 @@ static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) {
OS << "/////////\n";
OS << "// String table\n\n";
OS << "#ifdef OPTTABLE_STR_TABLE_CODE\n";
- Table.EmitStringTableDef(OS, "OptionStrTable", /*Indent=*/"");
+ Table.EmitStringTableDef(OS, "OptionStrTable");
OS << "#endif // OPTTABLE_STR_TABLE_CODE\n\n";
// Dump prefixes.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Make
AppendZero
a class member instead of an argument toGetOrAddStringOffset
to reflect the intended usage that for a givenStringToOffsetTable
, all strings must use the same value ofAppendZero
.Modify
EmitStringTableDef
to drop theIndent
argument as its always set to""
, and to fail if it's called for a table with non-null-terminated strings.