Skip to content

Commit e8860ef

Browse files
lunnyzeripath
authored andcommitted
Some refactor on git diff and ignore getting commit information failed on migrating pull request review comments (#9996)
* Some refactor on git diff and ignore getting commit information failed on migrating pull request review comments * fix test * fix lint * Change error log to warn
1 parent 1019913 commit e8860ef

File tree

7 files changed

+356
-321
lines changed

7 files changed

+356
-321
lines changed

modules/git/diff.go

Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
// Copyright 2020 The Gitea Authors. All rights reserved.
2+
// Use of this source code is governed by a MIT-style
3+
// license that can be found in the LICENSE file.
4+
5+
package git
6+
7+
import (
8+
"bufio"
9+
"bytes"
10+
"context"
11+
"fmt"
12+
"io"
13+
"os/exec"
14+
"regexp"
15+
"strconv"
16+
"strings"
17+
18+
"code.gitea.io/gitea/modules/process"
19+
)
20+
21+
// RawDiffType type of a raw diff.
22+
type RawDiffType string
23+
24+
// RawDiffType possible values.
25+
const (
26+
RawDiffNormal RawDiffType = "diff"
27+
RawDiffPatch RawDiffType = "patch"
28+
)
29+
30+
// GetRawDiff dumps diff results of repository in given commit ID to io.Writer.
31+
func GetRawDiff(repoPath, commitID string, diffType RawDiffType, writer io.Writer) error {
32+
return GetRawDiffForFile(repoPath, "", commitID, diffType, "", writer)
33+
}
34+
35+
// GetRawDiffForFile dumps diff results of file in given commit ID to io.Writer.
36+
func GetRawDiffForFile(repoPath, startCommit, endCommit string, diffType RawDiffType, file string, writer io.Writer) error {
37+
repo, err := OpenRepository(repoPath)
38+
if err != nil {
39+
return fmt.Errorf("OpenRepository: %v", err)
40+
}
41+
defer repo.Close()
42+
43+
return GetRepoRawDiffForFile(repo, startCommit, endCommit, diffType, file, writer)
44+
}
45+
46+
// GetRepoRawDiffForFile dumps diff results of file in given commit ID to io.Writer according given repository
47+
func GetRepoRawDiffForFile(repo *Repository, startCommit, endCommit string, diffType RawDiffType, file string, writer io.Writer) error {
48+
commit, err := repo.GetCommit(endCommit)
49+
if err != nil {
50+
return fmt.Errorf("GetCommit: %v", err)
51+
}
52+
fileArgs := make([]string, 0)
53+
if len(file) > 0 {
54+
fileArgs = append(fileArgs, "--", file)
55+
}
56+
// FIXME: graceful: These commands should have a timeout
57+
ctx, cancel := context.WithCancel(DefaultContext)
58+
defer cancel()
59+
60+
var cmd *exec.Cmd
61+
switch diffType {
62+
case RawDiffNormal:
63+
if len(startCommit) != 0 {
64+
cmd = exec.CommandContext(ctx, GitExecutable, append([]string{"diff", "-M", startCommit, endCommit}, fileArgs...)...)
65+
} else if commit.ParentCount() == 0 {
66+
cmd = exec.CommandContext(ctx, GitExecutable, append([]string{"show", endCommit}, fileArgs...)...)
67+
} else {
68+
c, _ := commit.Parent(0)
69+
cmd = exec.CommandContext(ctx, GitExecutable, append([]string{"diff", "-M", c.ID.String(), endCommit}, fileArgs...)...)
70+
}
71+
case RawDiffPatch:
72+
if len(startCommit) != 0 {
73+
query := fmt.Sprintf("%s...%s", endCommit, startCommit)
74+
cmd = exec.CommandContext(ctx, GitExecutable, append([]string{"format-patch", "--no-signature", "--stdout", "--root", query}, fileArgs...)...)
75+
} else if commit.ParentCount() == 0 {
76+
cmd = exec.CommandContext(ctx, GitExecutable, append([]string{"format-patch", "--no-signature", "--stdout", "--root", endCommit}, fileArgs...)...)
77+
} else {
78+
c, _ := commit.Parent(0)
79+
query := fmt.Sprintf("%s...%s", endCommit, c.ID.String())
80+
cmd = exec.CommandContext(ctx, GitExecutable, append([]string{"format-patch", "--no-signature", "--stdout", query}, fileArgs...)...)
81+
}
82+
default:
83+
return fmt.Errorf("invalid diffType: %s", diffType)
84+
}
85+
86+
stderr := new(bytes.Buffer)
87+
88+
cmd.Dir = repo.Path
89+
cmd.Stdout = writer
90+
cmd.Stderr = stderr
91+
pid := process.GetManager().Add(fmt.Sprintf("GetRawDiffForFile: [repo_path: %s]", repo.Path), cancel)
92+
defer process.GetManager().Remove(pid)
93+
94+
if err = cmd.Run(); err != nil {
95+
return fmt.Errorf("Run: %v - %s", err, stderr)
96+
}
97+
return nil
98+
}
99+
100+
// ParseDiffHunkString parse the diffhunk content and return
101+
func ParseDiffHunkString(diffhunk string) (leftLine, leftHunk, rightLine, righHunk int) {
102+
ss := strings.Split(diffhunk, "@@")
103+
ranges := strings.Split(ss[1][1:], " ")
104+
leftRange := strings.Split(ranges[0], ",")
105+
leftLine, _ = strconv.Atoi(leftRange[0][1:])
106+
if len(leftRange) > 1 {
107+
leftHunk, _ = strconv.Atoi(leftRange[1])
108+
}
109+
if len(ranges) > 1 {
110+
rightRange := strings.Split(ranges[1], ",")
111+
rightLine, _ = strconv.Atoi(rightRange[0])
112+
if len(rightRange) > 1 {
113+
righHunk, _ = strconv.Atoi(rightRange[1])
114+
}
115+
} else {
116+
log("Parse line number failed: %v", diffhunk)
117+
rightLine = leftLine
118+
righHunk = leftHunk
119+
}
120+
return
121+
}
122+
123+
// Example: @@ -1,8 +1,9 @@ => [..., 1, 8, 1, 9]
124+
var hunkRegex = regexp.MustCompile(`^@@ -(?P<beginOld>[0-9]+)(,(?P<endOld>[0-9]+))? \+(?P<beginNew>[0-9]+)(,(?P<endNew>[0-9]+))? @@`)
125+
126+
const cmdDiffHead = "diff --git "
127+
128+
func isHeader(lof string) bool {
129+
return strings.HasPrefix(lof, cmdDiffHead) || strings.HasPrefix(lof, "---") || strings.HasPrefix(lof, "+++")
130+
}
131+
132+
// CutDiffAroundLine cuts a diff of a file in way that only the given line + numberOfLine above it will be shown
133+
// it also recalculates hunks and adds the appropriate headers to the new diff.
134+
// Warning: Only one-file diffs are allowed.
135+
func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLine int) string {
136+
if line == 0 || numbersOfLine == 0 {
137+
// no line or num of lines => no diff
138+
return ""
139+
}
140+
scanner := bufio.NewScanner(originalDiff)
141+
hunk := make([]string, 0)
142+
// begin is the start of the hunk containing searched line
143+
// end is the end of the hunk ...
144+
// currentLine is the line number on the side of the searched line (differentiated by old)
145+
// otherLine is the line number on the opposite side of the searched line (differentiated by old)
146+
var begin, end, currentLine, otherLine int64
147+
var headerLines int
148+
for scanner.Scan() {
149+
lof := scanner.Text()
150+
// Add header to enable parsing
151+
if isHeader(lof) {
152+
hunk = append(hunk, lof)
153+
headerLines++
154+
}
155+
if currentLine > line {
156+
break
157+
}
158+
// Detect "hunk" with contains commented lof
159+
if strings.HasPrefix(lof, "@@") {
160+
// Already got our hunk. End of hunk detected!
161+
if len(hunk) > headerLines {
162+
break
163+
}
164+
// A map with named groups of our regex to recognize them later more easily
165+
submatches := hunkRegex.FindStringSubmatch(lof)
166+
groups := make(map[string]string)
167+
for i, name := range hunkRegex.SubexpNames() {
168+
if i != 0 && name != "" {
169+
groups[name] = submatches[i]
170+
}
171+
}
172+
if old {
173+
begin, _ = strconv.ParseInt(groups["beginOld"], 10, 64)
174+
end, _ = strconv.ParseInt(groups["endOld"], 10, 64)
175+
// init otherLine with begin of opposite side
176+
otherLine, _ = strconv.ParseInt(groups["beginNew"], 10, 64)
177+
} else {
178+
begin, _ = strconv.ParseInt(groups["beginNew"], 10, 64)
179+
if groups["endNew"] != "" {
180+
end, _ = strconv.ParseInt(groups["endNew"], 10, 64)
181+
} else {
182+
end = 0
183+
}
184+
// init otherLine with begin of opposite side
185+
otherLine, _ = strconv.ParseInt(groups["beginOld"], 10, 64)
186+
}
187+
end += begin // end is for real only the number of lines in hunk
188+
// lof is between begin and end
189+
if begin <= line && end >= line {
190+
hunk = append(hunk, lof)
191+
currentLine = begin
192+
continue
193+
}
194+
} else if len(hunk) > headerLines {
195+
hunk = append(hunk, lof)
196+
// Count lines in context
197+
switch lof[0] {
198+
case '+':
199+
if !old {
200+
currentLine++
201+
} else {
202+
otherLine++
203+
}
204+
case '-':
205+
if old {
206+
currentLine++
207+
} else {
208+
otherLine++
209+
}
210+
default:
211+
currentLine++
212+
otherLine++
213+
}
214+
}
215+
}
216+
217+
// No hunk found
218+
if currentLine == 0 {
219+
return ""
220+
}
221+
// headerLines + hunkLine (1) = totalNonCodeLines
222+
if len(hunk)-headerLines-1 <= numbersOfLine {
223+
// No need to cut the hunk => return existing hunk
224+
return strings.Join(hunk, "\n")
225+
}
226+
var oldBegin, oldNumOfLines, newBegin, newNumOfLines int64
227+
if old {
228+
oldBegin = currentLine
229+
newBegin = otherLine
230+
} else {
231+
oldBegin = otherLine
232+
newBegin = currentLine
233+
}
234+
// headers + hunk header
235+
newHunk := make([]string, headerLines)
236+
// transfer existing headers
237+
copy(newHunk, hunk[:headerLines])
238+
// transfer last n lines
239+
newHunk = append(newHunk, hunk[len(hunk)-numbersOfLine-1:]...)
240+
// calculate newBegin, ... by counting lines
241+
for i := len(hunk) - 1; i >= len(hunk)-numbersOfLine; i-- {
242+
switch hunk[i][0] {
243+
case '+':
244+
newBegin--
245+
newNumOfLines++
246+
case '-':
247+
oldBegin--
248+
oldNumOfLines++
249+
default:
250+
oldBegin--
251+
newBegin--
252+
newNumOfLines++
253+
oldNumOfLines++
254+
}
255+
}
256+
// construct the new hunk header
257+
newHunk[headerLines] = fmt.Sprintf("@@ -%d,%d +%d,%d @@",
258+
oldBegin, oldNumOfLines, newBegin, newNumOfLines)
259+
return strings.Join(newHunk, "\n")
260+
}

modules/git/diff_test.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// Copyright 2020 The Gitea Authors. All rights reserved.
2+
// Use of this source code is governed by a MIT-style
3+
// license that can be found in the LICENSE file.
4+
5+
package git
6+
7+
import (
8+
"strings"
9+
"testing"
10+
11+
"github.com/stretchr/testify/assert"
12+
)
13+
14+
const exampleDiff = `diff --git a/README.md b/README.md
15+
--- a/README.md
16+
+++ b/README.md
17+
@@ -1,3 +1,6 @@
18+
# gitea-github-migrator
19+
+
20+
+ Build Status
21+
- Latest Release
22+
Docker Pulls
23+
+ cut off
24+
+ cut off`
25+
26+
func TestCutDiffAroundLine(t *testing.T) {
27+
result := CutDiffAroundLine(strings.NewReader(exampleDiff), 4, false, 3)
28+
resultByLine := strings.Split(result, "\n")
29+
assert.Len(t, resultByLine, 7)
30+
// Check if headers got transferred
31+
assert.Equal(t, "diff --git a/README.md b/README.md", resultByLine[0])
32+
assert.Equal(t, "--- a/README.md", resultByLine[1])
33+
assert.Equal(t, "+++ b/README.md", resultByLine[2])
34+
// Check if hunk header is calculated correctly
35+
assert.Equal(t, "@@ -2,2 +3,2 @@", resultByLine[3])
36+
// Check if line got transferred
37+
assert.Equal(t, "+ Build Status", resultByLine[4])
38+
39+
// Must be same result as before since old line 3 == new line 5
40+
newResult := CutDiffAroundLine(strings.NewReader(exampleDiff), 3, true, 3)
41+
assert.Equal(t, result, newResult, "Must be same result as before since old line 3 == new line 5")
42+
43+
newResult = CutDiffAroundLine(strings.NewReader(exampleDiff), 6, false, 300)
44+
assert.Equal(t, exampleDiff, newResult)
45+
46+
emptyResult := CutDiffAroundLine(strings.NewReader(exampleDiff), 6, false, 0)
47+
assert.Empty(t, emptyResult)
48+
49+
// Line is out of scope
50+
emptyResult = CutDiffAroundLine(strings.NewReader(exampleDiff), 434, false, 0)
51+
assert.Empty(t, emptyResult)
52+
}
53+
54+
func BenchmarkCutDiffAroundLine(b *testing.B) {
55+
for n := 0; n < b.N; n++ {
56+
CutDiffAroundLine(strings.NewReader(exampleDiff), 3, true, 3)
57+
}
58+
}
59+
60+
func ExampleCutDiffAroundLine() {
61+
const diff = `diff --git a/README.md b/README.md
62+
--- a/README.md
63+
+++ b/README.md
64+
@@ -1,3 +1,6 @@
65+
# gitea-github-migrator
66+
+
67+
+ Build Status
68+
- Latest Release
69+
Docker Pulls
70+
+ cut off
71+
+ cut off`
72+
result := CutDiffAroundLine(strings.NewReader(diff), 4, false, 3)
73+
println(result)
74+
}
75+
76+
func TestParseDiffHunkString(t *testing.T) {
77+
leftLine, leftHunk, rightLine, rightHunk := ParseDiffHunkString("@@ -19,3 +19,5 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER")
78+
assert.EqualValues(t, 19, leftLine)
79+
assert.EqualValues(t, 3, leftHunk)
80+
assert.EqualValues(t, 19, rightLine)
81+
assert.EqualValues(t, 5, rightHunk)
82+
}

modules/migrations/gitea.go

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import (
2828
"code.gitea.io/gitea/modules/setting"
2929
"code.gitea.io/gitea/modules/structs"
3030
"code.gitea.io/gitea/modules/timeutil"
31-
"code.gitea.io/gitea/services/gitdiff"
3231

3332
gouuid "github.com/satori/go.uuid"
3433
)
@@ -783,19 +782,22 @@ func (g *GiteaLocalUploader) CreateReviews(reviews ...*base.Review) error {
783782
}
784783

785784
for _, comment := range review.Comments {
785+
_, _, line, _ := git.ParseDiffHunkString(comment.DiffHunk)
786+
786787
headCommitID, err := g.gitRepo.GetRefCommitID(pr.GetGitRefName())
787788
if err != nil {
788789
return fmt.Errorf("GetRefCommitID[%s]: %v", pr.GetGitRefName(), err)
789790
}
791+
792+
var patch string
790793
patchBuf := new(bytes.Buffer)
791-
if err := gitdiff.GetRawDiffForFile(g.gitRepo.Path, pr.MergeBase, headCommitID, gitdiff.RawDiffNormal, comment.TreePath, patchBuf); err != nil {
792-
return fmt.Errorf("GetRawDiffForLine[%s, %s, %s, %s]: %v", g.gitRepo.Path, pr.MergeBase, headCommitID, comment.TreePath, err)
794+
if err := git.GetRepoRawDiffForFile(g.gitRepo, pr.MergeBase, headCommitID, git.RawDiffNormal, comment.TreePath, patchBuf); err != nil {
795+
// We should ignore the error since the commit maybe removed when force push to the pull request
796+
log.Warn("GetRepoRawDiffForFile failed when migrating [%s, %s, %s, %s]: %v", g.gitRepo.Path, pr.MergeBase, headCommitID, comment.TreePath, err)
797+
} else {
798+
patch = git.CutDiffAroundLine(patchBuf, int64((&models.Comment{Line: int64(line + comment.Position - 1)}).UnsignedLine()), line < 0, setting.UI.CodeCommentLines)
793799
}
794800

795-
_, _, line, _ := gitdiff.ParseDiffHunkString(comment.DiffHunk)
796-
797-
patch := gitdiff.CutDiffAroundLine(patchBuf, int64((&models.Comment{Line: int64(line + comment.Position - 1)}).UnsignedLine()), line < 0, setting.UI.CodeCommentLines)
798-
799801
var c = models.Comment{
800802
Type: models.CommentTypeCode,
801803
PosterID: comment.PosterID,

routers/repo/commit.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,10 +292,10 @@ func Diff(ctx *context.Context) {
292292

293293
// RawDiff dumps diff results of repository in given commit ID to io.Writer
294294
func RawDiff(ctx *context.Context) {
295-
if err := gitdiff.GetRawDiff(
295+
if err := git.GetRawDiff(
296296
models.RepoPath(ctx.Repo.Owner.Name, ctx.Repo.Repository.Name),
297297
ctx.Params(":sha"),
298-
gitdiff.RawDiffType(ctx.Params(":ext")),
298+
git.RawDiffType(ctx.Params(":ext")),
299299
ctx.Resp,
300300
); err != nil {
301301
ctx.ServerError("GetRawDiff", err)

0 commit comments

Comments
 (0)