Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ private static PyFile parseAs(String content, LanguageLevel languageLevel) {
}

@NotNull
private static String normalizeEol(String content) {
public static String normalizeEol(String content) {
return content.replaceAll("\\r\\n?", "\n");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@ public class PythonTokenLocation {
private final int endLineOffset;

public PythonTokenLocation(@NotNull PsiElement element) {
Document psiDocument = element.getContainingFile().getViewProvider().getDocument();
int startOffset = element.getTextRange().getStartOffset();
this(element.getTextRange().getStartOffset(), element.getTextRange().getEndOffset(), element.getContainingFile().getViewProvider().getDocument());
}

public PythonTokenLocation(int startOffset, int endOffset, Document psiDocument) {
startLine = psiDocument.getLineNumber(startOffset);
int startLineNumberOffset = psiDocument.getLineStartOffset(startLine);
startLineOffset = startOffset - startLineNumberOffset;
int endOffset = element.getTextRange().getEndOffset();
endLine = psiDocument.getLineNumber(endOffset);
int endLineNumberOffset = psiDocument.getLineStartOffset(endLine);
endLineOffset = endOffset - endLineNumberOffset;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ private void scanFile(InputFile inputFile) {
try {
visitorContext = new PythonVisitorContext(parser.parse(fileContent), pythonFile);
pyFile = new org.sonar.python.frontend.PythonParser().parse(fileContent);
saveMeasures(inputFile, visitorContext, pyFile);
saveMeasures(inputFile, visitorContext, pyFile, fileContent);
} catch (RecognitionException e) {
visitorContext = new PythonVisitorContext(pythonFile, e);
LOG.error("Unable to parse file: " + inputFile.toString());
Expand Down Expand Up @@ -162,12 +162,12 @@ private static NewIssueLocation newLocation(InputFile inputFile, NewIssue issue,
return newLocation;
}

private void saveMeasures(InputFile inputFile, PythonVisitorContext visitorContext, PyFile pyFile) {
private void saveMeasures(InputFile inputFile, PythonVisitorContext visitorContext, PyFile pyFile, String fileContent) {
boolean ignoreHeaderComments = new PythonConfiguration(context.fileSystem().encoding()).getIgnoreHeaderComments();
FileMetrics fileMetrics = new FileMetrics(visitorContext, ignoreHeaderComments, pyFile);
MetricsVisitor metricsVisitor = fileMetrics.metricsVisitor();

cpdAnalyzer.pushCpdTokens(inputFile, visitorContext);
cpdAnalyzer.pushCpdTokens(inputFile, pyFile, fileContent);
noSonarFilter.noSonarInFile(inputFile, metricsVisitor.getLinesWithNoSonar());

Set<Integer> linesOfCode = metricsVisitor.getLinesOfCode();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,56 +19,79 @@
*/
package org.sonar.plugins.python.cpd;

import com.sonar.sslr.api.AstNode;
import com.sonar.sslr.api.GenericTokenType;
import com.sonar.sslr.api.Token;
import com.sonar.sslr.api.TokenType;
import java.util.List;
import com.intellij.openapi.editor.Document;
import com.intellij.psi.PsiElement;
import com.intellij.psi.tree.IElementType;
import com.jetbrains.python.PyTokenTypes;
import com.jetbrains.python.lexer.PythonIndentingLexer;
import com.jetbrains.python.psi.PyElementType;
import com.jetbrains.python.psi.PyFile;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import javax.annotation.CheckForNull;
import org.sonar.api.batch.fs.InputFile;
import org.sonar.api.batch.sensor.SensorContext;
import org.sonar.api.batch.sensor.cpd.NewCpdTokens;
import org.sonar.python.PythonVisitorContext;
import org.sonar.python.TokenLocation;
import org.sonar.python.api.PythonTokenType;
import org.sonar.api.utils.log.Logger;
import org.sonar.api.utils.log.Loggers;
import org.sonar.python.frontend.PythonParser;
import org.sonar.python.frontend.PythonTokenLocation;

public class PythonCpdAnalyzer {

private final SensorContext context;
private static final Set<PyElementType> IGNORED_TOKEN_TYPES = new HashSet<>(Arrays.asList(
PyTokenTypes.LINE_BREAK, PyTokenTypes.DEDENT, PyTokenTypes.INDENT, PyTokenTypes.END_OF_LINE_COMMENT, PyTokenTypes.SPACE, PyTokenTypes.STATEMENT_BREAK));
private static final Logger LOG = Loggers.get(PythonCpdAnalyzer.class);

public PythonCpdAnalyzer(SensorContext context) {
this.context = context;
}

public void pushCpdTokens(InputFile inputFile, PythonVisitorContext visitorContext) {
AstNode root = visitorContext.rootTree();
if (root != null) {
NewCpdTokens cpdTokens = context.newCpdTokens().onFile(inputFile);
List<Token> tokens = root.getTokens();
for (int i = 0; i < tokens.size(); i++) {
Token token = tokens.get(i);
TokenType currentTokenType = token.getType();
TokenType nextTokenType = i + 1 < tokens.size() ? tokens.get(i + 1).getType() : GenericTokenType.EOF;
// INDENT/DEDENT could not be completely ignored during CPD see https://docs.python.org/3/reference/lexical_analysis.html#indentation
// Just taking into account DEDENT is enough, but because the DEDENT token has an empty value, it's the
// preceding new line which is added in its place to create a difference
if (isNewLineWithIndentationChange(currentTokenType, nextTokenType) || !isIgnoredType(currentTokenType)) {
TokenLocation location = new TokenLocation(token);
cpdTokens.addToken(location.startLine(), location.startLineOffset(), location.endLine(), location.endLineOffset(), token.getValue());
public void pushCpdTokens(InputFile inputFile, PyFile pyFile, String fileContent) {
Document document = getDocument(pyFile);
if (document == null) {
LOG.debug("Cannot complete CPD analysis: PSIDocument is null.");
return;
}
PythonIndentingLexer lexer = new PythonIndentingLexer();
lexer.start(PythonParser.normalizeEol(fileContent));
NewCpdTokens cpdTokens = context.newCpdTokens().onFile(inputFile);
IElementType prevTokenType = null;
while (lexer.getTokenType() != null) {
IElementType currentTokenType = lexer.getTokenType();
// INDENT/DEDENT could not be completely ignored during CPD see https://docs.python.org/3/reference/lexical_analysis.html#indentation
// Just taking into account DEDENT is enough, but because the DEDENT token has an empty value, it's the
// following new line which is added in its place to create a difference
if (isNewLineWithIndentationChange(prevTokenType, currentTokenType) || !IGNORED_TOKEN_TYPES.contains(currentTokenType)) {
int tokenEnd = lexer.getTokenEnd();
String tokenText = lexer.getTokenText();
if (currentTokenType == PyTokenTypes.LINE_BREAK) {
tokenText = "\n";
tokenEnd = lexer.getTokenStart() + 1;
}
PythonTokenLocation location = new PythonTokenLocation(lexer.getTokenStart(), tokenEnd, document);
cpdTokens.addToken(location.startLine(), location.startLineOffset(), location.endLine(), location.endLineOffset(), tokenText);
}
cpdTokens.save();
prevTokenType = currentTokenType;
lexer.advance();
}

cpdTokens.save();
}

private static boolean isNewLineWithIndentationChange(TokenType currentTokenType, TokenType nextTokenType) {
return currentTokenType.equals(PythonTokenType.NEWLINE) && nextTokenType.equals(PythonTokenType.DEDENT);
private static boolean isNewLineWithIndentationChange(@CheckForNull IElementType prevTokenType, IElementType currentTokenType) {
return prevTokenType != null && prevTokenType == PyTokenTypes.DEDENT && currentTokenType == PyTokenTypes.LINE_BREAK;
}

private static boolean isIgnoredType(TokenType type) {
return type.equals(PythonTokenType.NEWLINE) ||
type.equals(PythonTokenType.DEDENT) ||
type.equals(PythonTokenType.INDENT) ||
type.equals(GenericTokenType.EOF);
@CheckForNull
private static Document getDocument(PyFile pyFile) {
PsiElement root = pyFile.getFirstChild();
if (root == null) {
return null;
}
return root.getContainingFile().getViewProvider().getDocument();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
*/
package org.sonar.plugins.python.cpd;

import com.jetbrains.python.psi.PyFile;
import java.io.File;
import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
import java.util.List;
import java.util.stream.Collectors;
Expand All @@ -32,8 +32,7 @@
import org.sonar.api.batch.sensor.internal.SensorContextTester;
import org.sonar.plugins.python.Python;
import org.sonar.plugins.python.TestUtils;
import org.sonar.python.PythonVisitorContext;
import org.sonar.python.TestPythonVisitorRunner;
import org.sonar.python.frontend.PythonParser;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.assertj.core.api.Assertions.assertThat;
Expand All @@ -46,9 +45,21 @@ public class PythonCpdAnalyzerTest {

@Test
public void code_chunks_2() {
DefaultInputFile inputFile = inputFile("code_chunks_2.py");
PythonVisitorContext visitorContext = TestPythonVisitorRunner.createContext(inputFile.path().toFile());
cpdAnalyzer.pushCpdTokens(inputFile, visitorContext);
File file = new File(BASE_DIR, "code_chunks_2.py");

String content = TestUtils.fileContent(file, UTF_8);
DefaultInputFile inputFile = TestInputFileBuilder.create("moduleKey", file.getName())
.setModuleBaseDir(Paths.get(BASE_DIR))
.setCharset(UTF_8)
.setType(InputFile.Type.MAIN)
.setLanguage(Python.KEY)
.initMetadata(content)
.build();

context.fileSystem().add(inputFile);

PyFile pyFile = new PythonParser().parse(content);
cpdAnalyzer.pushCpdTokens(inputFile, pyFile, content);

List<TokensLine> lines = context.cpdTokens("moduleKey:code_chunks_2.py");
assertThat(lines).isNotNull().hasSize(25);
Expand Down Expand Up @@ -89,19 +100,4 @@ public void code_chunks_2() {
"[itemforiteminitems]");
}

private DefaultInputFile inputFile(String fileName) {
File file = new File(BASE_DIR, fileName);

DefaultInputFile inputFile = TestInputFileBuilder.create("moduleKey", file.getName())
.setModuleBaseDir(Paths.get(BASE_DIR))
.setCharset(UTF_8)
.setType(InputFile.Type.MAIN)
.setLanguage(Python.KEY)
.initMetadata(TestUtils.fileContent(file, StandardCharsets.UTF_8))
.build();

context.fileSystem().add(inputFile);

return inputFile;
}
}