Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
nix repl: Provide documentation from comment when evaluating to lambda
- Loading branch information
Showing
6 changed files
with
609 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,227 @@ | ||
#include <iostream> | ||
#include <fstream> | ||
#include <sstream> | ||
#include <regex> | ||
#include <climits> | ||
#include <algorithm> | ||
|
||
#include "comment.hh" | ||
#include "util.hh" | ||
|
||
// This module looks for documentation comments in the source code. | ||
// | ||
// Documentation is not retained during parsing, and it should not be, | ||
// for performance reasons. Because of this the code has to jump | ||
// through some hoops, to perform its task. | ||
// | ||
// Adapting the parser was not considered an option, so this code | ||
// parses the comments from scratch, using regular expressions. These | ||
// do not support all syntactic constructs, so in rare cases, they | ||
// will fail and the code will report no documentation. | ||
// | ||
// One such situation is where documentation is requested for a | ||
// partially applied function, where the outer lambda pattern | ||
// matches an attribute set. This is not supported in the regexes | ||
// because it potentially requires (almost?) the entire grammar. | ||
// | ||
// This module has been designed not to report the wrong | ||
// documentation; considering that the wrong documentation is worse | ||
// than no documentation. The regular expressions will only match | ||
// simple, well understood syntactic structures, or not match at all. | ||
// | ||
// This approach to finding documentation does not cause extra runtime | ||
// overhead, until used. | ||
// | ||
// This module does not support tab ('\t') characters. In some places | ||
// they are treated as single spaces. They should be avoided. | ||
|
||
namespace nix::Comment { | ||
|
||
struct Doc emptyDoc("", "", "", 0); | ||
|
||
// parseDoc will try to recover a Doc by looking at the text that leads up to a term | ||
// definition. | ||
static struct Doc parseDoc(std::string sourcePrefix); | ||
|
||
// stripComment unpacks a comment, by unindenting and stripping " * " prefixes as | ||
// applicable. The argument should include any preceding whitespace. | ||
static std::string stripComment(std::string rawComment); | ||
|
||
// Consistent unindenting. It will only remove entire columns. | ||
static std::string unindent(std::string s); | ||
|
||
static std::string trimUnindent(std::string s) { | ||
return trim(unindent(s)); | ||
} | ||
|
||
static std::string stripPrefix(std::string prefix, std::string s) { | ||
std::string::size_type index = s.find(prefix); | ||
return (index == 0) ? s.erase(0, prefix.length()) : s; | ||
} | ||
|
||
struct Doc lookupDoc(Pos & pos) { | ||
|
||
try { | ||
std::ifstream ifs(static_cast<const std::string>(pos.file)); | ||
std::stringstream buffer; | ||
|
||
// read up to line and column with unix newlines | ||
|
||
std::string line; | ||
for (size_t i = 0; | ||
getline(ifs, line) && i < pos.line-1; | ||
i++) { | ||
buffer << line << "\n"; | ||
} | ||
buffer << line.substr(0, pos.column-1); | ||
|
||
return parseDoc(buffer.str()); | ||
} catch (std::exception e) { | ||
std::cout << "Caught exception: " << e.what() << std::endl; | ||
return emptyDoc; | ||
} | ||
} | ||
|
||
// See lambdas in parseDoc | ||
static int countLambdas(std::string piece) { | ||
return std::count(piece.begin(), piece.end(), ':'); | ||
} | ||
|
||
// Try to recover a Doc by looking at the text that leads up to a term | ||
// definition | ||
static struct Doc parseDoc(std::string sourcePrefix) { | ||
|
||
std::string wss("[ \t\r\n]*"); | ||
std::string spaces("[ \t]*"); | ||
|
||
std::string singleLineComment(spaces + "#[^\r\n]*(?:\n|\r\n)"); | ||
std::string multiSingleLineComment("(?:" + singleLineComment + ")*"); | ||
std::string multiLineComment("\\/\\*(?:[^*]|\\*+[^*/])*\\*+\\/"); | ||
std::string commentUnit("(" + multiSingleLineComment + "|" + spaces + multiLineComment + ")" + wss); | ||
|
||
std::string ident("[a-zA-Z_][a-zA-Z0-9_'-]*" + wss); | ||
std::string identKeep("([a-zA-Z_][a-zA-Z0-9_'-]*)" + wss); | ||
|
||
// lvalue for nested attrset construction, but not matching | ||
// quoted identifiers or ${...} or comments inbetween etc | ||
std::string simplePath("(?:" + wss + ident + "\\.)*" + identKeep); | ||
|
||
std::string lambda(ident + wss + ":" + wss); | ||
|
||
// see countLambdas | ||
std::string lambdas("((:?" + lambda + ")*)"); | ||
|
||
std::string assign("=" + wss); | ||
|
||
std::string re(commentUnit + simplePath + assign + lambdas + "$"); | ||
std::regex e(re); | ||
|
||
#define REGEX_GROUP_COMMENT 1 | ||
#define REGEX_GROUP_NAME 2 | ||
#define REGEX_GROUP_LAMBDAS 3 | ||
#define REGEX_GROUP_MAX 4 | ||
|
||
std::smatch matches; | ||
regex_search(sourcePrefix, matches, e); | ||
|
||
std::stringstream buffer; | ||
if (matches.length() < REGEX_GROUP_MAX) { | ||
return emptyDoc; | ||
} | ||
|
||
std::string rawComment = matches[REGEX_GROUP_COMMENT]; | ||
std::string name = matches[REGEX_GROUP_NAME]; | ||
int timesApplied = countLambdas(matches[REGEX_GROUP_LAMBDAS]); | ||
return Doc(rawComment, stripComment(rawComment), name, timesApplied); | ||
} | ||
|
||
static std::string stripComment(std::string rawComment) { | ||
rawComment.erase(rawComment.find_last_not_of("\n")+1); | ||
|
||
std::string s(trimUnindent(rawComment)); | ||
|
||
if (s[0] == '/' && s[1] == '*') { | ||
// Remove the "/*" | ||
// Indentation will be removed consistently later on | ||
s[0] = ' '; | ||
s[1] = ' '; | ||
|
||
// Remove the "*/" | ||
if (!s.empty() && *(--s.end()) == '/') | ||
s.pop_back(); | ||
if (!s.empty() && *(--s.end()) == '*') | ||
s.pop_back(); | ||
|
||
s = trimUnindent(s); | ||
|
||
std::istringstream inStream(s); | ||
std::ostringstream stripped; | ||
|
||
std::string line; | ||
|
||
/* at first, we assume a comment | ||
* that is formatted like this | ||
* with '*' characters at the beginning | ||
* of the line. | ||
*/ | ||
bool hasStars = true; | ||
|
||
while(std::getline(inStream,line,'\n')){ | ||
if (hasStars && ( | ||
(!line.empty() && line[0] == '*') | ||
|| (line.length() >= 2 && line[0] == ' ' && line[1] == '*') | ||
)) { | ||
if (line[0] == ' ') { | ||
line = stripPrefix(" *", line); | ||
} else { | ||
line = stripPrefix("*", line); | ||
} | ||
} else { | ||
hasStars = false; | ||
} | ||
|
||
stripped << line << std::endl; | ||
} | ||
return trimUnindent(stripped.str()); | ||
} | ||
else { | ||
std::istringstream inStream(s); | ||
std::ostringstream stripped; | ||
|
||
std::string line; | ||
while(std::getline(inStream, line, '\n')) { | ||
line.erase(0, line.find("#") + 1); | ||
stripped << line << std::endl; | ||
} | ||
return trimUnindent(stripped.str()); | ||
} | ||
|
||
} | ||
|
||
static std::string unindent(std::string s) { | ||
size_t maxIndent = 1000; | ||
{ | ||
std::istringstream inStream(s); | ||
for (std::string line; std::getline(inStream, line); ) { | ||
size_t firstNonWS = line.find_first_not_of(" \t\r\n"); | ||
if (firstNonWS != std::string::npos) { | ||
maxIndent = std::min(firstNonWS, maxIndent); | ||
} | ||
} | ||
} | ||
|
||
std::ostringstream unindentedStream; | ||
{ | ||
std::istringstream inStream(s); | ||
for (std::string line; std::getline(inStream, line); ) { | ||
if (line.length() >= maxIndent) { | ||
unindentedStream << line.substr(maxIndent) << std::endl; | ||
} else { | ||
unindentedStream << std::endl; | ||
} | ||
} | ||
} | ||
return unindentedStream.str(); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#pragma once | ||
|
||
#include "nixexpr.hh" | ||
|
||
namespace nix::Comment { | ||
|
||
struct Doc { | ||
|
||
// Name that the term is assigned to | ||
std::string name; | ||
|
||
std::string rawComment; | ||
std::string comment; | ||
|
||
// Number of times the curried function must be applied to get the value | ||
// that this structure documents. | ||
// | ||
// This is useful when showing the documentation for a partially applied | ||
// curried function. The documentation is for the unapplied function, so | ||
// this is crucial information. | ||
int timesApplied; | ||
|
||
Doc(std::string rawComment, std::string comment, std::string name, int timesApplied) { | ||
this->name = name; | ||
this->rawComment = rawComment; | ||
this->comment = comment; | ||
this->timesApplied = timesApplied; | ||
} | ||
|
||
}; | ||
|
||
extern struct Doc emptyDoc; | ||
|
||
// lookupDoc will try to recover a Doc. This will perform perform I/O, | ||
// because documentation is not retained by the parser. | ||
// | ||
// Will return empty values if nothing can be found. | ||
// For its limitations, see the docs of the implementation. | ||
struct Doc lookupDoc(Pos & pos); | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.