Skip to content

Commit

Permalink
[FEATURE] support for comments
Browse files Browse the repository at this point in the history
  • Loading branch information
SeeSharpSoft committed Jul 18, 2020
1 parent e44e622 commit b847b35
Show file tree
Hide file tree
Showing 15 changed files with 215 additions and 22 deletions.
3 changes: 2 additions & 1 deletion src/main/java/net/seesharpsoft/intellij/plugins/csv/Csv.bnf
Expand Up @@ -19,12 +19,13 @@
COMMA='regexp:[,:;|\t]'
QUOTE='regexp:"'
CRLF='regexp:\n'
COMMENT='regexp:#.*(\n|$)'
]
}

csvFile ::= record (CRLF record)* [CRLF]

record ::= field (COMMA field)*
record ::= (COMMENT | (field (COMMA field)*))

field ::= (escaped | nonEscaped)

Expand Down
Expand Up @@ -10,16 +10,18 @@ public class CsvColumnInfoMap<T> {
private final Map<T, CsvColumnInfo<T>> myReverseInfoColumnMap;

private boolean hasErrors = false;
private boolean hasComments = false;

public CsvColumnInfoMap(Map<Integer, CsvColumnInfo<T>> infoColumnMap, boolean hasErrorsArg) {
public CsvColumnInfoMap(Map<Integer, CsvColumnInfo<T>> infoColumnMap, boolean hasErrorsArg, boolean hasCommentsArg) {
this.myInfoColumnMap = infoColumnMap;
this.myReverseInfoColumnMap = new HashMap<>();
buildReverseMap();
setHasErrors(hasErrorsArg);
setHasComments(hasCommentsArg);
}

public CsvColumnInfoMap(Map<Integer, CsvColumnInfo<T>> infoColumnMap) {
this(infoColumnMap, false);
this(infoColumnMap, false, false);
}

private void buildReverseMap() {
Expand Down Expand Up @@ -55,6 +57,14 @@ public void setHasErrors(boolean hasErrorsArg) {
hasErrors = hasErrorsArg;
}

public boolean hasComments() {
return hasComments;
}

public void setHasComments(boolean hasCommentsArg) {
hasComments = hasCommentsArg;
}

public boolean hasEmptyLastLine() {
CsvColumnInfo<T> columnInfo = myInfoColumnMap.get(0);
int size = columnInfo.getSize();
Expand Down
Expand Up @@ -201,6 +201,7 @@ public static CsvColumnInfoMap<PsiElement> createColumnInfoMap(CsvFile csvFile)
Map<Integer, CsvColumnInfo<PsiElement>> columnInfoMap = new HashMap<>();
CsvRecord[] records = PsiTreeUtil.getChildrenOfType(csvFile, CsvRecord.class);
int row = 0;
boolean hasComments = false;
for (CsvRecord record : records) {
int column = 0;
for (CsvField field : record.getFieldList()) {
Expand All @@ -213,9 +214,12 @@ public static CsvColumnInfoMap<PsiElement> createColumnInfoMap(CsvFile csvFile)
columnInfoMap.get(column).addElement(field, row, getFieldStartOffset(field), getFieldEndOffset(field));
++column;
}
if (record.getComment() != null) {
hasComments = true;
}
++row;
}
return new CsvColumnInfoMap(columnInfoMap, PsiTreeUtil.hasErrorElements(csvFile));
return new CsvColumnInfoMap(columnInfoMap, PsiTreeUtil.hasErrorElements(csvFile), hasComments);
}

public static String unquoteCsvValue(String content, CsvEscapeCharacter escapeCharacter) {
Expand Down
Expand Up @@ -4,6 +4,7 @@
import com.intellij.openapi.project.Project;
import com.intellij.openapi.vfs.VirtualFile;
import com.intellij.psi.PsiFile;
import net.seesharpsoft.intellij.plugins.csv.settings.CsvEditorSettings;
import org.jetbrains.annotations.NotNull;

public class CsvLexerFactory {
Expand All @@ -14,12 +15,13 @@ public static CsvLexerFactory getInstance() {
}

protected Lexer createLexer(@NotNull CsvValueSeparator separator, @NotNull CsvEscapeCharacter escapeCharacter) {
if (separator.isCustom()) {
if (separator.isCustom() || !CsvEditorSettings.getInstance().getCommentIndicator().isEmpty()) {
return new CsvSharpLexer(new CsvSharpLexer.Configuration(
separator.getCharacter(),
"\n",
escapeCharacter.getCharacter(),
"\""));
"\"",
CsvEditorSettings.getInstance().getCommentIndicator()));
}
return new CsvLexerAdapter(separator, escapeCharacter);
}
Expand Down
Expand Up @@ -5,6 +5,7 @@
import net.seesharpsoft.UnhandledSwitchCaseException;
import net.seesharpsoft.commons.util.Tokenizer;
import net.seesharpsoft.intellij.plugins.csv.psi.CsvTypes;
import net.seesharpsoft.intellij.plugins.csv.settings.CsvEditorSettings;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

Expand All @@ -18,6 +19,7 @@
public class CsvSharpLexer extends LexerBase {

private final Tokenizer<TokenType> tokenizer;
private final List<Tokenizer.Token<TokenType>> initialNextStateTokens;
private final List<Tokenizer.Token<TokenType>> unquotedNextStateTokens;
private final List<Tokenizer.Token<TokenType>> quotedNextStateTokens;

Expand All @@ -29,23 +31,34 @@ public class CsvSharpLexer extends LexerBase {
private IElementType currentTokenType;
private boolean failed;

private static final Map<TokenType, LexerState> INITIAL_NEXT_STATES = new HashMap<>();
private static final Map<TokenType, LexerState> UNQUOTED_NEXT_STATES = new HashMap<>();
private static final Map<TokenType, LexerState> QUOTED_NEXT_STATES = new HashMap<>();

static {
INITIAL_NEXT_STATES.put(TokenType.WHITESPACE, LexerState.Initial);
INITIAL_NEXT_STATES.put(TokenType.TEXT, LexerState.Unquoted);
INITIAL_NEXT_STATES.put(TokenType.VALUE_SEPARATOR, LexerState.Unquoted);
INITIAL_NEXT_STATES.put(TokenType.BEGIN_QUOTE, LexerState.Quoted);
INITIAL_NEXT_STATES.put(TokenType.RECORD_SEPARATOR, LexerState.Initial);
INITIAL_NEXT_STATES.put(TokenType.COMMENT, LexerState.Initial);

UNQUOTED_NEXT_STATES.put(TokenType.WHITESPACE, LexerState.Unquoted);
UNQUOTED_NEXT_STATES.put(TokenType.TEXT, LexerState.Unquoted);
UNQUOTED_NEXT_STATES.put(TokenType.COMMENT_CHARACTER, LexerState.Unquoted);
UNQUOTED_NEXT_STATES.put(TokenType.VALUE_SEPARATOR, LexerState.Unquoted);
UNQUOTED_NEXT_STATES.put(TokenType.RECORD_SEPARATOR, LexerState.Unquoted);
UNQUOTED_NEXT_STATES.put(TokenType.BEGIN_QUOTE, LexerState.Quoted);
UNQUOTED_NEXT_STATES.put(TokenType.RECORD_SEPARATOR, LexerState.Initial);

QUOTED_NEXT_STATES.put(TokenType.WHITESPACE, LexerState.Quoted);
QUOTED_NEXT_STATES.put(TokenType.TEXT, LexerState.Quoted);
QUOTED_NEXT_STATES.put(TokenType.COMMENT_CHARACTER, LexerState.Quoted);
QUOTED_NEXT_STATES.put(TokenType.ESCAPED_CHARACTER, LexerState.Quoted);
QUOTED_NEXT_STATES.put(TokenType.END_QUOTE, LexerState.Unquoted);
}

enum LexerState {
Initial(INITIAL_NEXT_STATES),
Unquoted(UNQUOTED_NEXT_STATES),
Quoted(QUOTED_NEXT_STATES);

Expand All @@ -71,22 +84,26 @@ enum TokenType {
ESCAPED_CHARACTER,
VALUE_SEPARATOR,
RECORD_SEPARATOR,
WHITESPACE
WHITESPACE,
COMMENT,
COMMENT_CHARACTER
}

public static class Configuration {
public static final Configuration DEFAULT = new Configuration(",", "\n", "\"", "\"");
public static final Configuration DEFAULT = new Configuration(",", "\n", "\"", "\"", "#");

public String valueSeparator;
public String recordSeparator;
public String escapeCharacter;
public String quoteCharacter;
public String commentCharacter;

public Configuration(String valueSeparator, String recordSeparator, String escapeCharacter, String quoteCharacter) {
public Configuration(String valueSeparator, String recordSeparator, String escapeCharacter, String quoteCharacter, String commentCharacter) {
this.valueSeparator = Pattern.quote(valueSeparator);
this.recordSeparator = Pattern.quote(recordSeparator);
this.escapeCharacter = Pattern.quote(escapeCharacter);
this.quoteCharacter = Pattern.quote(quoteCharacter);
this.commentCharacter = Pattern.quote(commentCharacter);
}
}

Expand All @@ -102,17 +119,32 @@ public CsvSharpLexer(Configuration configuration) {
tokenizer.add(TokenType.BEGIN_QUOTE, String.format("%s", configuration.quoteCharacter));
tokenizer.add(TokenType.VALUE_SEPARATOR, configuration.valueSeparator);
tokenizer.add(TokenType.RECORD_SEPARATOR, configuration.recordSeparator);
if (!configuration.commentCharacter.isEmpty()) {
tokenizer.add(TokenType.COMMENT_CHARACTER, configuration.commentCharacter);
tokenizer.add(TokenType.COMMENT, configuration.commentCharacter + ".*(?=(\n|$))");
}

if (configuration.escapeCharacter.equals(configuration.quoteCharacter)) {
tokenizer.add(TokenType.END_QUOTE, String.format("%s(?!%s)", configuration.quoteCharacter, configuration.quoteCharacter));
tokenizer.add(TokenType.ESCAPED_CHARACTER, String.format("(%s%s|%s|%s)+", configuration.quoteCharacter, configuration.quoteCharacter, configuration.valueSeparator, configuration.recordSeparator));
tokenizer.add(TokenType.TEXT, String.format("((?!%s)[^ \f%s%s])+", configuration.valueSeparator, configuration.quoteCharacter, configuration.recordSeparator));
if (!configuration.commentCharacter.isEmpty()) {
tokenizer.add(TokenType.TEXT, String.format("((?!(%s|%s))[^ \f%s%s])+", configuration.commentCharacter, configuration.valueSeparator, configuration.quoteCharacter, configuration.recordSeparator));
} else {
tokenizer.add(TokenType.TEXT, String.format("((?!%s)[^ \f%s%s])+", configuration.valueSeparator, configuration.quoteCharacter, configuration.recordSeparator));
}
} else {
tokenizer.add(TokenType.END_QUOTE, String.format("%s", configuration.quoteCharacter));
tokenizer.add(TokenType.ESCAPED_CHARACTER, String.format("(%s%s|%s%s|%s|%s)+", configuration.escapeCharacter, configuration.quoteCharacter, configuration.escapeCharacter, configuration.escapeCharacter, configuration.valueSeparator, configuration.recordSeparator));
tokenizer.add(TokenType.TEXT, String.format("((?!%s)[^ \f%s%s%s])+", configuration.valueSeparator, configuration.escapeCharacter, configuration.quoteCharacter, configuration.recordSeparator));
if (!configuration.commentCharacter.isEmpty()) {
tokenizer.add(TokenType.TEXT, String.format("((?!(%s|%s))[^ \f%s%s%s])+", configuration.commentCharacter, configuration.valueSeparator, configuration.escapeCharacter, configuration.quoteCharacter, configuration.recordSeparator));
} else {
tokenizer.add(TokenType.TEXT, String.format("((?!%s)[^ \f%s%s%s])+", configuration.valueSeparator, configuration.escapeCharacter, configuration.quoteCharacter, configuration.recordSeparator));
}
}

initialNextStateTokens = LexerState.Initial.getPossibleTokens().stream()
.map(tokenizer::getToken)
.collect(Collectors.toList());
unquotedNextStateTokens = LexerState.Unquoted.getPossibleTokens().stream()
.map(tokenizer::getToken)
.collect(Collectors.toList());
Expand All @@ -126,14 +158,14 @@ public void start(@NotNull CharSequence buffer, int startOffset, int endOffset,
this.buffer = buffer;
this.tokenStart = this.tokenEnd = startOffset;
this.bufferEnd = endOffset;
this.currentState = initialState == 0 ? LexerState.Unquoted : LexerState.Quoted;
this.currentState = LexerState.values()[initialState];
this.currentTokenType = null;
}

@Override
public int getState() {
locateToken();
return currentState == LexerState.Unquoted ? 0 : 1;
return currentState.ordinal();
}

@Nullable
Expand Down Expand Up @@ -178,6 +210,19 @@ protected void raiseFailure() {
tokenEnd = bufferEnd;
}

protected Collection<Tokenizer.Token<TokenType>> getCurrentTokenCollection() {
switch(this.currentState) {
case Initial:
return initialNextStateTokens;
case Unquoted:
return unquotedNextStateTokens;
case Quoted:
return quotedNextStateTokens;
default:
throw new UnhandledSwitchCaseException(this.currentState);
}
}

protected synchronized void locateToken() {
if (currentTokenType != null) {
return;
Expand All @@ -193,7 +238,7 @@ protected synchronized void locateToken() {
tokenizer.findToken(buffer,
tokenStart,
bufferEnd,
currentState == LexerState.Unquoted ? unquotedNextStateTokens : quotedNextStateTokens,
getCurrentTokenCollection(),
null
);

Expand Down Expand Up @@ -222,8 +267,12 @@ protected synchronized void locateToken() {
currentTokenType = CsvTypes.COMMA;
break;
case TEXT:
case COMMENT_CHARACTER:
currentTokenType = CsvTypes.TEXT;
break;
case COMMENT:
currentTokenType = CsvTypes.COMMENT;
break;
case WHITESPACE:
currentTokenType = com.intellij.psi.TokenType.WHITE_SPACE;
break;
Expand Down
Expand Up @@ -102,7 +102,7 @@ public void setEditable(boolean editable) {
}

public boolean isEditable() {
return this.tableIsEditable && !this.hasErrors();
return this.tableIsEditable && !this.hasErrors() && !hasComments();
}

public CsvColumnInfoMap<PsiElement> getColumnInfoMap() {
Expand All @@ -118,6 +118,14 @@ public boolean hasErrors() {
return (columnInfoMap != null && columnInfoMap.hasErrors());
}

public boolean hasComments() {
if (!isValid()) {
return false;
}
CsvColumnInfoMap columnInfoMap = getColumnInfoMap();
return (columnInfoMap != null && columnInfoMap.hasComments());
}

protected Object[][] storeStateChange(Object[][] data) {
Object[][] result = this.dataManagement.addState(data);
saveChanges();
Expand Down
Expand Up @@ -24,6 +24,8 @@ public class CsvSyntaxHighlighter extends SyntaxHighlighterBase {
createTextAttributesKey("CSV_DEFAULT_STRING", DefaultLanguageHighlighterColors.STRING);
public static final TextAttributesKey ESCAPED_TEXT =
createTextAttributesKey("CSV_ESCAPED_STRING", DefaultLanguageHighlighterColors.VALID_STRING_ESCAPE);
public static final TextAttributesKey COMMENT =
createTextAttributesKey("CSV_DEFAULT_COMMENT", DefaultLanguageHighlighterColors.LINE_COMMENT);
public static final TextAttributesKey BAD_CHARACTER =
createTextAttributesKey("CSV_BAD_CHARACTER", HighlighterColors.BAD_CHARACTER);

Expand All @@ -32,6 +34,7 @@ public class CsvSyntaxHighlighter extends SyntaxHighlighterBase {
private static final TextAttributesKey[] QUOTE_KEYS = new TextAttributesKey[] {QUOTE};
private static final TextAttributesKey[] TEXT_KEYS = new TextAttributesKey[] {TEXT};
private static final TextAttributesKey[] ESCAPED_TEXT_KEYS = new TextAttributesKey[] {ESCAPED_TEXT};
private static final TextAttributesKey[] COMMENT_KEYS = new TextAttributesKey[] {COMMENT};
private static final TextAttributesKey[] EMPTY_KEYS = new TextAttributesKey[0];

private final Project myProject;
Expand All @@ -57,6 +60,8 @@ public TextAttributesKey[] getTokenHighlights(IElementType tokenType) {
return QUOTE_KEYS;
} else if (tokenType.equals(CsvTypes.TEXT)) {
return TEXT_KEYS;
} else if (tokenType.equals(CsvTypes.COMMENT)) {
return COMMENT_KEYS;
} else if (tokenType.equals(CsvTypes.ESCAPED_TEXT)) {
return ESCAPED_TEXT_KEYS;
} else if (tokenType.equals(TokenType.BAD_CHARACTER)) {
Expand Down
Expand Up @@ -27,6 +27,7 @@ public void propertyChange(PropertyChangeEvent evt) {
switch (evt.getPropertyName()) {
case "defaultEscapeCharacter":
case "defaultValueSeparator":
case "commentIndicator":
FileContentUtilCore.reparseFiles(CsvFile.this.getVirtualFile());
break;
default:
Expand Down
Expand Up @@ -36,6 +36,7 @@ public class CsvColorSettings implements ColorSettingsPage {
attributesDescriptors.add(new AttributesDescriptor("Quote", CsvSyntaxHighlighter.QUOTE));
attributesDescriptors.add(new AttributesDescriptor("Text", CsvSyntaxHighlighter.TEXT));
attributesDescriptors.add(new AttributesDescriptor("Escaped Text", CsvSyntaxHighlighter.ESCAPED_TEXT));
attributesDescriptors.add(new AttributesDescriptor("Comment", CsvSyntaxHighlighter.COMMENT));

COLUMN_HIGHLIGHT_ATTRIBUTES = new ArrayList<>();
for (int i = 0; i < MAX_COLUMN_HIGHLIGHT_COLORS; ++i) {
Expand Down
Expand Up @@ -32,6 +32,8 @@ public class CsvEditorSettings implements PersistentStateComponent<CsvEditorSett
public static final CsvEscapeCharacter ESCAPE_CHARACTER_DEFAULT = CsvEscapeCharacter.QUOTE;
public static final CsvValueSeparator VALUE_SEPARATOR_DEFAULT = CsvValueSeparator.COMMA;

public static final String COMMENT_INDICATOR_DEFAULT = "#";

private static final CsvEditorSettings STATIC_INSTANCE = new CsvEditorSettings();

public enum EditorPrio {
Expand Down Expand Up @@ -63,6 +65,7 @@ public static final class OptionSet {
@OptionTag(converter = CsvValueSeparator.CsvValueSeparatorConverter.class)
public CsvValueSeparator DEFAULT_VALUE_SEPARATOR = VALUE_SEPARATOR_DEFAULT;
public boolean KEEP_TRAILING_SPACES = false;
public String COMMENT_INDICATOR = COMMENT_INDICATOR_DEFAULT;

public OptionSet() {
EditorSettingsExternalizable editorSettingsExternalizable = EditorSettingsExternalizable.getInstance();
Expand Down Expand Up @@ -271,4 +274,16 @@ public void setKeepTrailingSpaces(boolean keepTrailingSpaces) {
public boolean getKeepTrailingSpaces() {
return getState().KEEP_TRAILING_SPACES;
}

public void setCommentIndicator(String commentIndicator) {
String oldValue = getCommentIndicator();
getState().COMMENT_INDICATOR = commentIndicator.trim();
if (commentIndicator != oldValue) {
myPropertyChangeSupport.firePropertyChange("commentIndicator", oldValue, getCommentIndicator());
}
}

public String getCommentIndicator() {
return getState().COMMENT_INDICATOR;
}
}

0 comments on commit b847b35

Please sign in to comment.