# Gerrit exporter

This script tests how to extract comments for each revision from Gerrit. 

The input is the name of the Gerrit repository. 

## Configuration

Important imports

In [2]:
from requests.auth import HTTPDigestAuth
from pygerrit2 import GerritRestAPI, HTTPBasicAuth
from IPython.display import clear_output
import requests
import urllib

# debug
import pprint

In [5]:
# A bit of config - repo and filename

gerrit_url = "https://gerrit.onap.org/r"
fileName = "./gerrit_reviews.csv"

# pretty printer for json files
# used only when printing changes that have some problems
pp = pprint.PrettyPrinter(indent=2)

auth = None

# empty the file where we store the result
fileHandle = open(fileName, 'w', encoding = 'utf-8')

# header row
fileHandle.write('change_id;revision-id;filename;line;start_line;end_line;LOC;message\n')

# this line gets sets the parameters for the HTML API
rest = GerritRestAPI(url=gerrit_url, auth = auth)

# a set of parameters for the JSON API to get changes in batches of 500
start = 0                       # which batch we start from - usually 0
max_queries = 1                # how many batches we want to retrieve; for the sake of simplicity, we only do 10
queries = 0                     # counting number of queries asked
has_more = True                 # checking if there are more batches to fetch from gerrit
no_errors = 0                   # counting the number of exceptions received during batch processing

# extracting changes, processing batch by batch
# since the Gerrit API returns the changes in the batch of 500 we need to request it several times
while has_more and queries < max_queries:

    changes = rest.get("/changes/?q=status:merged&o=ALL_FILES&o=ALL_REVISIONS&o=DETAILED_LABELS&start={}".format(start), 
                   headers={'Content-Type': 'application/json'})
    queries += 1
    
    number_of_changes = len(changes)
    
    # if there are more pages of export
    # then we move on and get the next page
    # pp.pprint(changes[number_of_changes-1])
    if "_more_changes" in changes[number_of_changes-1]:
        has_more = True        
    else:
        has_more = False
            
    print("number of changes {}".format(number_of_changes))

    # here we process the changes
    for iIndex, change in enumerate(changes, start=1):
        changeID = change['id']
        
        if iIndex % 100 == 0:
            print("INFO: Extracting change: " + str(iIndex) + " of " + str(number_of_changes) + " starting at : " + str(start)) 
        
        revisions = change['revisions']

        for revID in list(revisions.keys()):
            currentComment = rest.get("/changes/{}/revisions/{}/comments".format(changeID,revID), headers={'Content-Type': 'application/json'})
            
            # not all revisions have comments, so we only look for those that have them
            if len(currentComment) > 0:                
                for oneFile, oneComment in currentComment.items():                    
                    try:
                        # this code extracts information about the comment 
                        # things like which file and which lines
                        for oneCommentItem in oneComment:
                            strFile = oneFile
                            
                            # a few if-s because not always all parameters are there
                            if 'line' in oneCommentItem:
                                strLine = oneCommentItem['line']
                            else:
                                strLine = ''

                            if 'message' in oneCommentItem:
                                strMessage = oneCommentItem['message']
                            else:
                                strMessage = ''
                            
                            # if there is a specific line and characters as comments
                            if 'range' in oneCommentItem:
                                strStartLine = oneCommentItem['range']['start_line']
                                strStartChar = oneCommentItem['range']['start_character']
                                strEndLine = oneCommentItem['range']['end_line']
                                strEndChar = oneCommentItem['range']['end_character']                        
                            else:                            
                                strStartLine = '0'
                                strStartChar = '0'
                                strEndLine = '0'
                                strEndChar = '0'

                            # if we can extract something from a file
                            # then here is where we do it
                            if strLine != '':
                                # we need the line below to properly encode the filename as URL
                                urlFileID = urllib.parse.quote_plus(strFile)
                                fileContentString = f'/changes/{changeID}/revisions/{revID}/files/{urlFileID}/content'
                                fileContent = rest.get(fileContentString, headers={'Content-Type': 'application/json'})
                                fileLines = fileContent.split("\n")

                                # if we have the lines delimitations (comment that is linked to lines)
                                if strStartLine != '0':
                                    iStartLine = int(strStartLine) - 1
                                    if strEndLine != '0':
                                        iEndLine = int(strEndLine) - 1  
                                    else: 
                                        iEndLine = len(fileLines) - 1

                                    for oneLine in fileLines[iStartLine:iEndLine]:
                                        strToCSV = str(changeID) + ";" + \
                                           str(revID) + ";" + \
                                           strFile + ";" + \
                                           str(strLine) + ";" + \
                                           str(strStartLine) + ";" + \
                                           str(strEndLine) + ";" + \
                                           oneLine.replace("\n", " _ ").replace('\r', '_').replace(';', '_') + ";" + \
                                           strMessage.replace("\n", " _ ").replace('\r', '_').replace(';', '_')
                                        fileHandle.write(strToCSV + "\n")
                                elif int(strLine) < len(fileLines):                                
                                    # and if there are no delimitation, but there is a starting line
                                    # and the starting line is below the end of the file
                                    oneLine = fileLines[int(strLine)-1]
                                    strToCSV = str(changeID) + ";" + \
                                               str(revID) + ";" + \
                                               strFile + ";" + \
                                               str(strLine) + ";" + \
                                               str(strStartLine) + ";" + \
                                               str(strEndLine) + ";" + \
                                               oneLine.replace("\n", " _ ").replace('\r', '_').replace(';', '_') + ";" + \
                                               strMessage.replace("\n", " _ ").replace('\r', '_').replace(';', '_')
                                    fileHandle.write(strToCSV + "\n")
                            else: 
                                # there is no line specified, then we take the comment for the entire file
                                for oneLine in fileLines:
                                        strToCSV = str(changeID) + ";" + \
                                           str(revID) + ";" + \
                                           strFile + ";" + \
                                           str(strLine) + ";" + \
                                           str(strStartLine) + ";" + \
                                           str(strEndLine) + ";" + \
                                           oneLine.replace("\n", " _ ").replace('\r', '_').replace(';', '_') + ";" + \
                                           strMessage.replace("\n", " _ ").replace('\r', '_').replace(';', '_')
                                        fileHandle.write(strToCSV + "\n")

                    except:
                        # this is a brutal exception handling, but we cannot check for all problems
                        # so, we ensure that we process the entire batch regardless of the errors
                        print('INFO: Unhandled exception, moving on')
                        pp.pprint(oneComment)
    
   
    if has_more:
        start += 500
            
fileHandle.close()
print("Done !!!")

number of changes 500
INFO: Unhandled exception, moving on
[ { 'author': { '_account_id': 2787,
                'email': 'liam.fallon@est.tech',
                'name': 'Liam Fallon',
                'username': 'liamfallon'},
    'id': '58b7c9c2_ab1526f5',
    'in_reply_to': '7baa98db_a7f18256',
    'line': 20,
    'message': 'Done',
    'unresolved': False,
    'updated': '2022-12-19 09:53:42.000000000'}]
INFO: Unhandled exception, moving on
[ { 'author': { '_account_id': 2787,
                'email': 'liam.fallon@est.tech',
                'name': 'Liam Fallon',
                'username': 'liamfallon'},
    'id': 'e9ae8514_744f6e1e',
    'in_reply_to': '4374c5db_e6c9c55d',
    'line': 20,
    'message': 'Done',
    'unresolved': False,
    'updated': '2022-12-19 09:53:42.000000000'}]
INFO: Unhandled exception, moving on
[ { 'author': { '_account_id': 2787,
                'email': 'liam.fallon@est.tech',
                'name': 'Liam Fallon',
                'username': 'liamfallo