Skip to content

Commit

Permalink
SONAR-10257 Added Myers diff detection
Browse files Browse the repository at this point in the history
  • Loading branch information
dbmeneses committed Feb 7, 2018
1 parent c48cd42 commit c4a26a8
Show file tree
Hide file tree
Showing 10 changed files with 101 additions and 86 deletions.
9 changes: 7 additions & 2 deletions server/sonar-server/pom.xml
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" <project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<parent> <parent>
Expand Down Expand Up @@ -143,6 +143,11 @@
<groupId>commons-lang</groupId> <groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId> <artifactId>commons-lang</artifactId>
</dependency> </dependency>
<dependency>
<groupId>com.googlecode.java-diff-utils</groupId>
<artifactId>diffutils</artifactId>
<version>1.2</version>
</dependency>
<dependency> <dependency>
<groupId>org.picocontainer</groupId> <groupId>org.picocontainer</groupId>
<artifactId>picocontainer</artifactId> <artifactId>picocontainer</artifactId>
Expand Down
Expand Up @@ -19,6 +19,7 @@
*/ */
package org.sonar.server.computation.task.projectanalysis.scm; package org.sonar.server.computation.task.projectanalysis.scm;


import java.util.LinkedHashMap;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
Expand All @@ -43,19 +44,21 @@ public static ScmInfo create(long analysisDate, Set<Integer> lines) {
return new GeneratedScmInfo(changesets); return new GeneratedScmInfo(changesets);
} }


public static ScmInfo create(long analysisDate, Set<Integer> lines, ScmInfo dbScmInfo) { public static ScmInfo create(long analysisDate, int[] matches, ScmInfo dbScmInfo) {
checkState(!lines.isEmpty(), "No changesets");

Changeset changeset = Changeset.newChangesetBuilder() Changeset changeset = Changeset.newChangesetBuilder()
.setDate(analysisDate) .setDate(analysisDate)
.build(); .build();
Map<Integer, Changeset> changesets = lines.stream()
.collect(Collectors.toMap(x -> x, i -> changeset));


dbScmInfo.getAllChangesets().entrySet().stream() Map<Integer, Changeset> dbChangesets = dbScmInfo.getAllChangesets();
.filter(e -> !lines.contains(e.getKey())) Map<Integer, Changeset> changesets = new LinkedHashMap<>(matches.length);
.forEach(e -> changesets.put(e.getKey(), e.getValue()));


for (int i = 0; i < matches.length; i++) {
if (matches[i] > 0) {
changesets.put(i + 1, dbChangesets.get(matches[i]));
} else {
changesets.put(i + 1, changeset);
}
}
return new GeneratedScmInfo(changesets); return new GeneratedScmInfo(changesets);
} }


Expand Down
Expand Up @@ -96,7 +96,7 @@ private Optional<ScmInfo> generateScmInfoForAllFile(Component file) {
return Optional.of(GeneratedScmInfo.create(analysisMetadata.getAnalysisDate(), newOrChangedLines)); return Optional.of(GeneratedScmInfo.create(analysisMetadata.getAnalysisDate(), newOrChangedLines));
} }


private ScmInfo removeAuthorAndRevision(ScmInfo info) { private static ScmInfo removeAuthorAndRevision(ScmInfo info) {
Map<Integer, Changeset> cleanedScmInfo = info.getAllChangesets().entrySet().stream() Map<Integer, Changeset> cleanedScmInfo = info.getAllChangesets().entrySet().stream()
.collect(Collectors.toMap(Map.Entry::getKey, e -> removeAuthorAndRevision(e.getValue()))); .collect(Collectors.toMap(Map.Entry::getKey, e -> removeAuthorAndRevision(e.getValue())));
return new ScmInfoImpl(cleanedScmInfo); return new ScmInfoImpl(cleanedScmInfo);
Expand All @@ -120,11 +120,9 @@ private Optional<ScmInfo> generateAndMergeDb(Component file, boolean copyFromPre
} }


// generate date for new/changed lines // generate date for new/changed lines
Set<Integer> newOrChangedLines = sourceLinesDiff.getNewOrChangedLines(file); int[] matchingLines = sourceLinesDiff.getMatchingLines(file);
if (newOrChangedLines.isEmpty()) {
return Optional.of(scmInfo); return Optional.of(GeneratedScmInfo.create(analysisMetadata.getAnalysisDate(), matchingLines, scmInfo));
}
return Optional.of(GeneratedScmInfo.create(analysisMetadata.getAnalysisDate(), newOrChangedLines, scmInfo));
} }


} }
Expand Up @@ -19,9 +19,8 @@
*/ */
package org.sonar.server.computation.task.projectanalysis.source; package org.sonar.server.computation.task.projectanalysis.source;


import java.util.Set;
import org.sonar.server.computation.task.projectanalysis.component.Component; import org.sonar.server.computation.task.projectanalysis.component.Component;


public interface SourceLinesDiff { public interface SourceLinesDiff {
Set<Integer> getNewOrChangedLines(Component component); int[] getMatchingLines(Component component);
} }
Expand Up @@ -19,9 +19,9 @@
*/ */
package org.sonar.server.computation.task.projectanalysis.source; package org.sonar.server.computation.task.projectanalysis.source;


import java.util.HashSet; import difflib.myers.MyersDiff;
import difflib.myers.PathNode;
import java.util.List; import java.util.List;
import java.util.Set;


public class SourceLinesDiffFinder { public class SourceLinesDiffFinder {


Expand All @@ -33,39 +33,42 @@ public SourceLinesDiffFinder(List<String> database, List<String> report) {
this.report = report; this.report = report;
} }


public Set<Integer> findNewOrChangedLines() { /**
return walk(0, 0, new HashSet<>()); * Creates a diff between the file in the database and the file in the report using Myers' algorithm, and links matching lines between
} * both files.

* @return an array with one entry for each line in the report. Those entries point either to a line in the database, or to 0,
private Set<Integer> walk(int r, int db, HashSet<Integer> acc) { * in which case it means the line was added.
*/
public int[] findMatchingLines() {
int[] index = new int[report.size()];


if (r >= report.size()) { int dbLine = database.size();
return acc; int reportLine = report.size();
} try {
PathNode node = MyersDiff.buildPath(database.toArray(), report.toArray());


if (db < database.size()) { while (node.prev != null) {

PathNode prevNode = node.prev;
if (report.get(r).equals(database.get(db))) {
walk(stepIndex(r), stepIndex(db), acc);
return acc;
}


List<String> remainingDatabase = database.subList(db, database.size()); if (!node.isSnake()) {
if (remainingDatabase.contains(report.get(r))) { // additions
int nextDb = db + remainingDatabase.indexOf(report.get(r)); reportLine -= (node.j - prevNode.j);
walk(r, nextDb, acc); // removals
return acc; dbLine -= (node.i - prevNode.i);
} else {
// matches
for (int i = node.i; i > prevNode.i; i--) {
index[reportLine - 1] = dbLine;
reportLine--;
dbLine--;
}
}
node = prevNode;
} }

} catch (Exception e) {
return index;
} }

return index;
acc.add(r+1);
walk(stepIndex(r), db, acc);
return acc;
}

private static int stepIndex(int r) {
return ++r;
} }


} }
Expand Up @@ -21,7 +21,6 @@


import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Set;
import org.sonar.core.hash.SourceLinesHashesComputer; import org.sonar.core.hash.SourceLinesHashesComputer;
import org.sonar.core.util.CloseableIterator; import org.sonar.core.util.CloseableIterator;
import org.sonar.db.DbClient; import org.sonar.db.DbClient;
Expand All @@ -43,24 +42,27 @@ public SourceLinesDiffImpl(DbClient dbClient, FileSourceDao fileSourceDao, Sourc
} }


@Override @Override
public Set<Integer> getNewOrChangedLines(Component component) { public int[] getMatchingLines(Component component) {


List<String> database = new ArrayList<>(); List<String> database;
try (DbSession dbSession = dbClient.openSession(false)) { try (DbSession dbSession = dbClient.openSession(false)) {
database.addAll(fileSourceDao.selectLineHashes(dbSession, component.getUuid())); database = fileSourceDao.selectLineHashes(dbSession, component.getUuid());
if (database == null) {
database = new ArrayList<>();
}
} }


List<String> report = new ArrayList<>(); List<String> report;
SourceLinesHashesComputer linesHashesComputer = new SourceLinesHashesComputer(); SourceLinesHashesComputer linesHashesComputer = new SourceLinesHashesComputer();
try (CloseableIterator<String> lineIterator = sourceLinesRepository.readLines(component)) { try (CloseableIterator<String> lineIterator = sourceLinesRepository.readLines(component)) {
while (lineIterator.hasNext()) { while (lineIterator.hasNext()) {
String line = lineIterator.next(); String line = lineIterator.next();
linesHashesComputer.addLine(line); linesHashesComputer.addLine(line);
} }
} }
report.addAll(linesHashesComputer.getLineHashes()); report = linesHashesComputer.getLineHashes();


return new SourceLinesDiffFinder(database, report).findNewOrChangedLines(); return new SourceLinesDiffFinder(database, report).findMatchingLines();


} }


Expand Down
Expand Up @@ -20,7 +20,6 @@
package org.sonar.server.computation.task.projectanalysis.scm; package org.sonar.server.computation.task.projectanalysis.scm;


import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.tngtech.java.junit.dataprovider.DataProvider; import com.tngtech.java.junit.dataprovider.DataProvider;
import com.tngtech.java.junit.dataprovider.DataProviderRunner; import com.tngtech.java.junit.dataprovider.DataProviderRunner;
import com.tngtech.java.junit.dataprovider.UseDataProvider; import com.tngtech.java.junit.dataprovider.UseDataProvider;
Expand Down Expand Up @@ -206,7 +205,7 @@ public void generate_scm_info_when_nothing_in_db_and_report_is_has_no_changesets
@Test @Test
public void generate_scm_info_for_new_and_changed_lines_when_report_is_empty() { public void generate_scm_info_for_new_and_changed_lines_when_report_is_empty() {
createDbScmInfoWithOneLine("hash"); createDbScmInfoWithOneLine("hash");
when(diff.getNewOrChangedLines(FILE)).thenReturn(ImmutableSet.of(2, 3)); when(diff.getMatchingLines(FILE)).thenReturn(new int[] {1, 0, 0});
addFileSourceInReport(3); addFileSourceInReport(3);
ScmInfo scmInfo = underTest.getScmInfo(FILE).get(); ScmInfo scmInfo = underTest.getScmInfo(FILE).get();
assertThat(scmInfo.getAllChangesets()).hasSize(3); assertThat(scmInfo.getAllChangesets()).hasSize(3);
Expand All @@ -216,7 +215,7 @@ public void generate_scm_info_for_new_and_changed_lines_when_report_is_empty() {
assertChangeset(scmInfo.getChangesetForLine(3), null, null, analysisDate.getTime()); assertChangeset(scmInfo.getChangesetForLine(3), null, null, analysisDate.getTime());


verify(dbLoader).getScmInfo(FILE); verify(dbLoader).getScmInfo(FILE);
verify(diff).getNewOrChangedLines(FILE); verify(diff).getMatchingLines(FILE);
verifyNoMoreInteractions(dbLoader); verifyNoMoreInteractions(dbLoader);
verifyZeroInteractions(sourceHashRepository); verifyZeroInteractions(sourceHashRepository);
verifyNoMoreInteractions(diff); verifyNoMoreInteractions(diff);
Expand Down
Expand Up @@ -21,7 +21,6 @@


import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Set;
import org.junit.Test; import org.junit.Test;


import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThat;
Expand All @@ -45,9 +44,9 @@ public void shouldFindNothingWhenContentAreIdentical() {
report.add("line - 3"); report.add("line - 3");
report.add("line - 4"); report.add("line - 4");


Set<Integer> diff = new SourceLinesDiffFinder(database, report).findNewOrChangedLines(); int[] diff = new SourceLinesDiffFinder(database, report).findMatchingLines();


assertThat(diff).isEmpty(); assertThat(diff).containsExactly(1, 2, 3, 4, 5);


} }


Expand Down Expand Up @@ -78,9 +77,8 @@ public void shouldFindNothingWhenContentAreIdentical2() {
report.add(" }\n"); report.add(" }\n");
report.add("}\n"); report.add("}\n");


Set<Integer> diff = new SourceLinesDiffFinder(database, report).findNewOrChangedLines(); int[] diff = new SourceLinesDiffFinder(database, report).findMatchingLines();

assertThat(diff).containsExactly(1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 5, 6, 7);
assertThat(diff).containsExactlyInAnyOrder(5, 6, 7, 8, 10, 11, 12);


} }


Expand All @@ -99,9 +97,9 @@ public void shouldDetectWhenStartingWithModifiedLines() {
report.add("line - 2"); report.add("line - 2");
report.add("line - 3"); report.add("line - 3");


Set<Integer> diff = new SourceLinesDiffFinder(database, report).findNewOrChangedLines(); int[] diff = new SourceLinesDiffFinder(database, report).findMatchingLines();


assertThat(diff).containsExactlyInAnyOrder(1, 2); assertThat(diff).containsExactly(0, 0, 3, 4);


} }


Expand All @@ -120,9 +118,9 @@ public void shouldDetectWhenEndingWithModifiedLines() {
report.add("line - 2 - modified"); report.add("line - 2 - modified");
report.add("line - 3 - modified"); report.add("line - 3 - modified");


Set<Integer> diff = new SourceLinesDiffFinder(database, report).findNewOrChangedLines(); int[] diff = new SourceLinesDiffFinder(database, report).findMatchingLines();


assertThat(diff).containsExactlyInAnyOrder(3, 4); assertThat(diff).containsExactly(1, 2, 0, 0);


} }


Expand All @@ -145,9 +143,9 @@ public void shouldDetectModifiedLinesInMiddleOfTheFile() {
report.add("line - 4"); report.add("line - 4");
report.add("line - 5"); report.add("line - 5");


Set<Integer> diff = new SourceLinesDiffFinder(database, report).findNewOrChangedLines(); int[] diff = new SourceLinesDiffFinder(database, report).findMatchingLines();


assertThat(diff).containsExactlyInAnyOrder(3, 4); assertThat(diff).containsExactly(1, 2, 0, 0, 5, 6);


} }


Expand All @@ -166,9 +164,9 @@ public void shouldDetectNewLinesAtBeginningOfFile() {
report.add("line - 1"); report.add("line - 1");
report.add("line - 2"); report.add("line - 2");


Set<Integer> diff = new SourceLinesDiffFinder(database, report).findNewOrChangedLines(); int[] diff = new SourceLinesDiffFinder(database, report).findMatchingLines();


assertThat(diff).containsExactlyInAnyOrder(1, 2); assertThat(diff).containsExactly(0, 0, 1, 2, 3);


} }


Expand All @@ -189,9 +187,9 @@ public void shouldDetectNewLinesInMiddleOfFile() {
report.add("line - 2"); report.add("line - 2");
report.add("line - 3"); report.add("line - 3");


Set<Integer> diff = new SourceLinesDiffFinder(database, report).findNewOrChangedLines(); int[] diff = new SourceLinesDiffFinder(database, report).findMatchingLines();


assertThat(diff).containsExactlyInAnyOrder(3, 4); assertThat(diff).containsExactly(1, 2, 0, 0, 3, 4);


} }


Expand All @@ -210,9 +208,9 @@ public void shouldDetectNewLinesAtEndOfFile() {
report.add("line - new"); report.add("line - new");
report.add("line - new"); report.add("line - new");


Set<Integer> diff = new SourceLinesDiffFinder(database, report).findNewOrChangedLines(); int[] diff = new SourceLinesDiffFinder(database, report).findMatchingLines();


assertThat(diff).containsExactlyInAnyOrder(4, 5); assertThat(diff).containsExactly(1, 2, 3, 0, 0);


} }


Expand All @@ -231,9 +229,9 @@ public void shouldIgnoreDeletedLinesAtEndOfFile() {
report.add("line - 1"); report.add("line - 1");
report.add("line - 2"); report.add("line - 2");


Set<Integer> diff = new SourceLinesDiffFinder(database, report).findNewOrChangedLines(); int[] diff = new SourceLinesDiffFinder(database, report).findMatchingLines();


assertThat(diff).isEmpty(); assertThat(diff).containsExactly(1, 2, 3);


} }


Expand All @@ -254,10 +252,9 @@ public void shouldIgnoreDeletedLinesInTheMiddleOfFile() {
report.add("line - 4"); report.add("line - 4");
report.add("line - 5"); report.add("line - 5");


Set<Integer> diff = new SourceLinesDiffFinder(database, report).findNewOrChangedLines(); int[] diff = new SourceLinesDiffFinder(database, report).findMatchingLines();

assertThat(diff).isEmpty();


assertThat(diff).containsExactly(1, 2, 5, 6);
} }


@Test @Test
Expand All @@ -273,9 +270,9 @@ public void shouldIgnoreDeletedLinesAtTheStartOfTheFile() {
report.add("line - 2"); report.add("line - 2");
report.add("line - 3"); report.add("line - 3");


Set<Integer> diff = new SourceLinesDiffFinder(database, report).findNewOrChangedLines(); int[] diff = new SourceLinesDiffFinder(database, report).findMatchingLines();


assertThat(diff).isEmpty(); assertThat(diff).containsExactly(3, 4);


} }


Expand Down
Expand Up @@ -82,7 +82,7 @@ public void should_find_no_diff_when_report_and_db_content_are_identical() {
setFileContentInReport(FILE_REF, CONTENT); setFileContentInReport(FILE_REF, CONTENT);


Component component = fileComponent(FILE_REF); Component component = fileComponent(FILE_REF);
assertThat(underTest.getNewOrChangedLines(component)).isEmpty(); assertThat(underTest.getMatchingLines(component)).containsExactly(1, 2, 3, 4, 5, 6, 7);


} }


Expand Down

0 comments on commit c4a26a8

Please sign in to comment.