In [1]:
import sys
sys.version_info

sys.version_info(major=3, minor=8, micro=10, releaselevel='final', serial=0)

In [2]:
import os, enum;
from abc import ABC, abstractmethod

class File:
    dir = None
    fileName = None
    fullyQualified = None
    def __init__(self, dir, fileName):
        self.dir = dir
        self.fileName = fileName
        self.fullyQualified = dir + '/' + fileName;
        
def addFiles(fileList, directory):
   for root, dirs, files in os.walk(directory):
        for file in files:
            fileList.append(File(root, file))
        for subdir in dirs:
            addFiles(fileList, subdir);
            
    
class Result(enum.Enum):
    failedMatch = 0           #didn't match - reset to zero
    matchedProgress = 1       #all requirements met - move on to the next requirement 
    oneOfManyMatches = 2      #Data gathered - keep on feeding me more lines
    gameSetMatch = 3          #Found end of data set - go spit out results
"""
    define an array of matching patterns
    Params:
    1 - Index to expect parameter 2
    2 - Space delimited match string
    3 - Data array index to capture
 - Populated value - starts as None (null)
"""
class Requirement(ABC):
    @abstractmethod
    def testline(self, line):
        pass
    @abstractmethod
    def reset(self):
        pass

class Splicer(Requirement):
    def __init__(self, matchIdx, matchValue, captureIndex, foundValue):
        self.matchIdx = matchIdx
        self.matchValue = matchValue
        self.captureIndex = captureIndex
        self.foundValue = foundValue
    @abstractmethod
    def getSpliceChar(self):
        pass
    def testline(self, line):
        sp = line.split(self.getSpliceChar())
        if (sp is not None and 
            len(sp) > self.matchIdx and 
            len(sp) > self.captureIndex and 
            self.matchValue == sp[self.matchIdx]):
            self.foundValue = sp[self.captureIndex]
            return Result.matchedProgress
        return Result.failedMatch
    def reset(self):
        self.foundValue = None
        
class SpaceSplicer(Splicer):
    def getSpliceChar(self):
        return ' '

class ColonSplicer(Splicer): #Ouch?
    def getSpliceChar(self):
        return ':'
class Blank(Requirement):
    def reset(self):
        return;
    def testline(self, line):
        if (line is None):
            print('Not sure what to: do with None-zies')
        elif (len(line.strip()) == 0):
            #blank line!
            return Result.matchedProgress
        return Result.failedMatch
class Comment(Requirement):
    def reset(self):
        self.comment = None;
    def testline(self, line):
        self.comment = line.strip()
class FileInfo:
    def __init__(self, extension):
        self.extension = extension
        self.textLineCount = 0;
        self.binByteCount= 0;
class FileCommit(Requirement):
    def __init__(self):
        self.reset()
    def reset(self):
        self.extensionDic = {}
    def testline(self, line):
        return Result.oneOfManyMatches
        
class Summary(Requirement):
    def reset(self):
        self.junk = ''
    def testline(self, line):
        sp = line.split(',')
        if (len(sp) < 2):
            print('ERROR - Summary line should have comma-separated change and insertion totals:'+line)
        else:
#442 files changed, 220745 insertions(+)
            words = sp[0].split(' ')
            if (len(words) > 0 and words[0].isnumeric()):
                self.filesChanged = int(words[0])
            words = sp[1].split(' ')
            if (len(words) > 0 and words[0].isnumeric()):
                self.insertionCount = int(words[0])
        return Result.gameSetMatch
            
class RequirementSet:
    def __init__(self):
        self.reqArray = []
        self.reqArray.append( SpaceSplicer(0, 'commit', 1, None) )
        self.reqArray.append( ColonSplicer(0, 'Author', 1, None) )
        self.reqArray.append( ColonSplicer(0, 'Date', 1, None) )
        self.reqArray.append( Blank() )
        self.reqArray.append( Comment() )
        self.reqArray.append( Blank() )
        self.reqArray.append( FileCommit() )
        self.reqArray.append( Summary() )
        self.reqIndex = 0
        self.dataMatchesFound = 0
    def reset(self):
        self.reqIndex = 0;
        self.dataMatchesFound = 0;
        for req in self.reqArray:
            req.reset();
    def processResult(self, resultStream, rslt):
        if (rslt == Result.failedMatch):
            self.reset();
            self.reqIndex = 0;
        elif (rslt == Result.matchedProgress):
            self.reqIndex += 1
            if (self.reqIndex >= len(self.reqArray)):
                print('ERROR - Last element of RequirementSet cannot return Result.matchedProgress');
                self.reset();
                self.reqIndex = 0;
        elif (rslt == Result.oneOfManyMatches):
            #Just keep reading until done
            self.dataMatchesFound += 1
        elif (rslt == Result.gameSetMatch):
            for req in reqArray:
                req.printResults(resultStream)
"""                
        else:
            print('ERROR - Unknown result type: '+str(rslt))
"""            
    def testline(self, line, resultStream):
        if (self.reqIndex >= len(self.reqArray)):
            print('ERROR - RequirementIndex out of range!')
            reset();
            self.reqIndex = 0;
            resultStream.delete()
            resultStream.close()
        else:
            result = self.reqArray[self.reqIndex].testline(line)
            self.processResult(resultStream, self.reqArray[self.reqIndex].testline(line))
                        
                

fileList = []

reqSet = RequirementSet()
addFiles(fileList, '/home/matt/Projects/Web3HackerNetwork/data/github');
print('Found '+str(len(fileList))+' files');
root = '/home/matt/Projects/Web3HackerNetwork/data/samples'
log = open(root+'/commitScan.log', 'w')
for fileClass in fileList:
    firstline = None
    fileExt = None
    linecount = 0
    commitLineIdx = 0
    commitLine = None
    commitID = None
    
    if (fileClass.fileName.endswith('.tsv')):
        print('Skipping '+fileClass.fileName)
    else:
        fq = fileClass.fullyQualified
        result = open(str(fq)+'.tsv', 'w')

        with open(str(fileClass.fullyQualified), 'r') as file:
            log.write('Reading file: '+fileName+'\n')

            for line in file:
                linecount += 1
                reqSet.testline(line, result);
            file.close()


        print(str(file.name)+' : '+str(linecount)+' lines')


IndentationError: unexpected indent (1734654736.py, line 153)