In [1]:
def introspect(obj):
  for func in [type, id, dir, vars, callable]:
        print("%s(%s):\t\t%s" % (func.__name__, introspect.__code__.co_varnames[0], func(obj)))


In [2]:
import os, enum;
from abc import ABC, abstractmethod
import sys
import traceback, inspect, json

        
class File:
    dir = None
    fileName = None
    fullyQualified = None
    def __init__(self, dir, fileName):
        self.dir = dir
        self.fileName = fileName
        self.fullyQualified = dir + '/' + fileName;
        
def addFiles(fileList, directory):
   for root, dirs, files in os.walk(directory):
        for file in files:
            fileList.append(File(root, file))
        for subdir in dirs:
            addFiles(fileList, subdir);
            
    
class Result(enum.Enum):
    failedMatch = 0           #didn't match - reset to zero
    matchedProgress = 1       #all requirements met - move on to the next requirement 
    oneOfManyMatches = 2      #Data gathered - keep on feeding me more lines
    endOfSet = 3              #contiguous set has ended - re-analyze this line
    gameSetMatch = 4          #Found end of data set - go spit out results
    lookForExtraComment = 5
"""
    define an array of matching patterns
    Params:
    1 - Index to expect parameter 2
    2 - Space delimited match string
    3 - Data array index to capture
 - Populated value - starts as None (null)
"""
class Requirement(ABC):
    @abstractmethod
    def testline(self, line):
        pass
    @abstractmethod
    def reset(self):
        pass
    @abstractmethod
    def addResults(self, dictionary):
        pass

class EndOfNumStat(Requirement):
    def testline(self, line):
        return Result.gameSetMatch
    def reset(self):
        pass
    def addResults(self, dictionary):
        pass
    
class Splicer(Requirement):
    def __init__(self, matchIdx, matchValue, captureIndex, foundValue):
        self.matchIdx = matchIdx
        self.matchValue = matchValue
        self.captureIndex = captureIndex
        self.foundValue = foundValue
    def addResults(self, dictionary):
        dictionary[self.matchValue] = self.foundValue;
    @abstractmethod
    def getSpliceChar(self):
        pass
    def testline(self, line):
        sp = line.split(self.getSpliceChar())
        if (sp is not None and 
            len(sp) > self.matchIdx and 
            len(sp) > self.captureIndex and 
            self.matchValue == sp[self.matchIdx]):
            self.foundValue = sp[self.captureIndex].strip()
            return Result.matchedProgress
        return Result.failedMatch
    def reset(self):
        self.foundValue = None
        
class SpaceSplicer(Splicer):
    def getSpliceChar(self):
        return ' '

class ColonSplicer(Splicer): #Ouch?
    def getSpliceChar(self):
        return ':'
    def testline(self, line):
        retVal = super().testline(line)
        if (retVal == Result.matchedProgress):
            if (self.matchValue == 'Date'):
                i = line.index('Date:')
                self.foundValue = line[i+5::]
                self.foundValue = line.strip()
        return retVal
    
class Blank(Requirement):
    def reset(self):
        return;
    def addResults(self, dictionary):
        return;
    def testline(self, line):
        if (line is None):
            print('Not sure what to: do with None-zies')
        elif (len(line.strip()) == 0):
            #blank line!
            return Result.matchedProgress
        return Result.failedMatch
class Comment(Requirement):
    def reset(self):
        self.comment = None;
    def addResults(self, dictionary):
        return;
    def testline(self, line):
        self.comment = line.strip()
        return Result.matchedProgress
    
class FileInfo:
    def __init__(self, extension):
        self.extension = extension
        self.textLineCount = 0;
        self.binByteCount= 0;
        self.isBinary = False
        self.inserts = 0
        self.deletes = 0
        self.occurrences = 0
def removeEmptyStrings(array):
    retVal = []
    for k in array:
        if (len(k) > 0):
            retVal.append(k)
    return retVal
def addIntValue(dictionary, key, intval):
    curVal = dictionary.get(key)
    if (curVal is None):
        curVal = 0
    dictionary[key] = curVal + intval;
class FileCommit(Requirement):
    def __init__(self):
        self.reset()
    def reset(self):
        self.extensionDic = {}
        self.foundOneOrMoreLines = False
    def addResults(self, dictionary):
        fileTypes = dictionary.get('fileTypes')
        if (fileTypes is None):
            fileTypes = {}
            dictionary['fileTypes'] = fileTypes;
        for key in self.extensionDic:
            fi = self.extensionDic.get(key)
            sumDic = fileTypes.get(fi.extension)
            if (sumDic is None):
                sumDic = {}
                fileTypes[fi.extension] = sumDic
            addIntValue(sumDic, 'textLineCount', fi.textLineCount)
            addIntValue(sumDic, 'binByteCount', fi.binByteCount)
            addIntValue(sumDic, 'inserts', fi.inserts)
            addIntValue(sumDic, 'deletes', fi.deletes)
            addIntValue(sumDic, 'occurrences', fi.occurrences)
            
    def getExt(self, ext):
        fi = self.extensionDic.get(ext)
        if (fi is None):
            fi = FileInfo(ext)
            self.extensionDic[ext] = fi
        return fi
    @abstractmethod
    def split(self, line):
        pass
    @abstractmethod
    def processStatistics(self, line, file_info, extension):
        pass
    
        
    def testline(self, line):
#        print('Testing for commit line: "'+line+'"')
        fileNamePortion, statsPortion = self.split(line)
        validData = False;
        if (fileNamePortion is not None and statsPortion is not None):
            fileNameArray = fileNamePortion.split('/');
            fileName = fileNameArray[len(fileNameArray)-1];
#            print('Filename spliced into:'+fileName)
            dotSplit = fileName.split('.')
            ext = 'noexttext'
            if (fileName.startswith('.') == False and len(dotSplit) > 1):
                ext = dotSplit[len(dotSplit)-1].strip() #last element (e.g. '.txt')
                ext = removeEmptyStrings(ext.split('}'))[0]
            fi = self.getExt(ext)
            fi.occurrences += 1
            validData = self.processStatistics(statsPortion, fi, ext)

        returnVal = Result.endOfSet
        if (validData == True):
            self.foundOneOrMoreLines = True
#            print('Returning '+str(Result.oneOfManyMatches))
            returnVal = Result.oneOfManyMatches
        elif (self.foundOneOrMoreLines == False):
            returnVal = Result.lookForExtraComment
                
        return returnVal
    
class StatFileCommit(FileCommit):
    def split(self, line):
        sp = line.split('|')
        if (len(sp) < 2):
            return None, None
        return sp[0], sp[1]

    def processStatistics(self, line, fi, ext):
        validData = False;
        size = removeEmptyStrings(line.split(' '))        
#            print('Size element array is:'+str(size))
        if (size[0].startswith('Bin')): #binary file - handle separately
            if (ext == 'noexttext'):
                fi.occurrences -= 1
                ext = 'noextbin'
                fi = self.getExt(ext)
                fi.occurrences += 1
            if (len(size) > 1):
                sizeBefore = int(size[1]) if size[1].isnumeric() else -1
                sizeAfter = int(size[3]) if size[3].isnumeric() else -1
                if (sizeBefore >= 0 and sizeAfter >= 0):
                    validData = True
                    fi.isBinary = True
                    fi.binByteCount += (sizeAfter - sizeBefore)
            else:
                validData = True
                fi.isBinary = True
        elif (size[0].isnumeric and len(size[0]) > 0):
            fi.isBinary = False
            try:
                lc = int(size[0])
                fi.textLineCount += lc
                plusCount = 0
                minusCount = 0
                if (lc < 1 and len(size) < 2):
                    #all done here
                    validData = True
                else:
                    plus = size[1].split('+')
                    for p in plus:
                        if (len(p) == 0):
                            plusCount += 1
                        else:
                            mi = len(p.split('-')) - 1
                            minusCount += mi
                        if (plusCount > 0 or minusCount > 0):
                            validData = True
                            fi.inserts = int(fi.textLineCount * ((plusCount * 1.0) / (plusCount + minusCount)))
                            fi.deletes = fi.textLineCount - fi.inserts
                        else:
                            print('No bueno!')
            except:
                print('Exception encountered parsing:', size[0])
        return validData
                        
class NumStatFileCommit(FileCommit):
    def split(self, line):
        try:
            chunks = line.split('\t')
            file_name_portion = chunks[2]
            stats_portion = chunks[0]+' '+chunks[1]
            if (chunks[0].isnumeric() or chunks[0] == '-') and (chunks[1].isnumeric() or chunks[0] == '-'):
                return file_name_portion, stats_portion
        except:
            pass
        return None, None
    def processStatistics(self, line, fi, ext):
        validData = False
        try:
            sa = line.split(' ')
            if len(sa) == 2:
                if sa[0] == '-':
                    #binary file
                    fi.isBinary = True
                    validData = True
                elif sa[0].isnumeric() and sa[1].isnumeric:
                    fi.isBinary = False
                    fi.inserts += int(sa[0])
                    fi.deletes += int(sa[1])
                    validData = True
        except:
            print('NumStatFileCommit Error parsing:',line)
        return validData
        
class Summary(Requirement):
    def __init__(self):
        self.reset()
    def reset(self):
        self.junk = ''
        self.totals = {}
    def testline(self, line):
        #print('Testing for Summary line ('+str(line)+')')
        sp = line.split(',')
        for elem in sp:
            spaceDelim = removeEmptyStrings(elem.split(' '))
            if (len(spaceDelim) > 1):
                self.totals[spaceDelim[1]] = spaceDelim[0]
            else:
                print('ERROR - Summary line should have comma-separated change and insertion totals:'+line)
        return Result.gameSetMatch
    def addResults(self, dictionary):
        dictionary.update(self.totals)

class RequirementSet:
    def processDocument(self, multiLineString):
        for line in multiLineString.splitlines():
            self.testline(line)

    def getReqArray(self):
        return self.reqArray;
    @abstractmethod
    def setup_requirements(self):
        pass
    
    def __init__(self):
        self.reqArray = []
        self.setup_requirements()
        self.reqIndex = 0
        self.dataMatchesFound = 0
        self.indexErrorDic = {}
        self.resultArray = []
        self.reset()
    def reset(self):
        self.reqIndex = 0;
        self.dataMatchesFound = 0;
        self.resultDictionary = {}
        for req in self.getReqArray():
            req.reset();
    def processResult(self, line, rslt):
#        if (rslt != Result.failedMatch):
#            print('Processing['+str(self.reqIndex)+']: '+str(rslt));
        if (rslt == Result.failedMatch):
            self.reset();
            self.reqIndex = 0;
        elif (rslt == Result.matchedProgress):
            self.reqIndex += 1
            if (self.reqIndex >= len(self.reqArray)):
                print('ERROR - Last element of RequirementSet cannot return Result.matchedProgress')
                self.reset()
                self.reqIndex = 0
        elif (rslt == Result.oneOfManyMatches):
            #Just keep reading until done
            self.dataMatchesFound += 1
#            print('Just one of many matches ('+str(self.dataMatchesFound)+' total)')
        elif (rslt == Result.endOfSet):
            self.reqIndex += 1
            self.testline(line)
        elif (rslt == Result.gameSetMatch):
#            print('Game set match!')
            for req in self.reqArray:
                req.addResults(self.resultDictionary)
            self.resultArray.append(self.resultDictionary.copy())
            self.reset()
        elif (rslt == Result.lookForExtraComment):
            self.reqIndex = 4 #Go back to the stage that
            self.testline(line)
        else:
            self.reqIndex = 0
            cnt = self.indexErrorDic.get(self.reqIndex)
            if (cnt is None):
                cnt = 0;
                print('ERROR - Unknown result type from requirement index: '+str(self.reqIndex))
            cnt += 1;
            self.indexErrorDic[self.reqIndex] = cnt;

    def testline(self, line):
        if (self.reqIndex >= len(self.reqArray)):
            print('ERROR - RequirementIndex out of range!')
            self.reset();
            self.reqIndex = 0;
            sys.exit()
        else:
            self.processResult(line, self.reqArray[self.reqIndex].testline(line))
            

        
                        
class StatRequirementSet(RequirementSet):
    def setup_requirements(self):
        reqArray = super().getReqArray()
        reqArray.append( SpaceSplicer(0, 'commit', 1, None) )
        reqArray.append( ColonSplicer(0, 'Author', 1, None) )
        reqArray.append( ColonSplicer(0, 'Date', 1, None) )
        reqArray.append( Blank() )
        reqArray.append( Comment() )
        reqArray.append( Blank() )
        reqArray.append( StatFileCommit() )
        reqArray.append( Summary() )
    def __init__(self):
        super().__init__()
        
class NumstatRequirementSet(RequirementSet):        
    def setup_requirements(self):
        reqArray = super().getReqArray()
        reqArray.append( SpaceSplicer(0, 'commit', 1, None) )
        reqArray.append( ColonSplicer(0, 'Author', 1, None) )
        reqArray.append( ColonSplicer(0, 'Date', 1, None) )
        reqArray.append( Blank() )
        reqArray.append( Comment() )
        reqArray.append( Blank() )
        reqArray.append( NumStatFileCommit() )
        reqArray.append( EndOfNumStat() )
    def __init__(self):
        super().__init__()



In [5]:
import requests
import json
import hashlib
import os
from datetime import datetime as datingdays
from git import Repo, Git
import sys
import time
project_root_path = '../../..'
python_lib_path = project_root_path + '/python/lib'
sys.path.append(python_lib_path)
#from commit_log_parser import NumstatRequirementSet
from pytz import timezone
from os.path import exists
        
def loadCachedURL(url, forceReload = False):
    body = None
    hl = hashlib.new('sha256')
    ba = bytearray(url.encode())
    hl.update(ba)
    thing = hl.hexdigest()
    cachedFileName = './'+thing
    loaded = False;
    if not forceReload:
        try:
            with open(cachedFileName, 'r') as f:
                body = json.load(f)
                f.close()
            loaded = True
        except EnvironmentError:
            pass

    if not loaded:
        resp = requests.get(url)
        if (resp.status_code == 200):
            body = resp.json()
            with open(cachedFileName, 'w') as f:
                f.write(resp.text)
                f.close()
    return body
class Commit:
    sha = None
    date = None
    hacker = None
class RepoName:
    def key(self):
        return self.owner+'/'+self.repo_name
    def __init__(self, owner, repo_name):
        self.owner = owner
        self.repo_name = repo_name
    
class Hacker:
    def toJSON(self):
        return json.dumps(self, default=lambda o: o.__dict__, 
            sort_keys=True, indent=2)    
    def __init__(self):
        self.user_id = None
        self.commits = []
        self.aliases = Counter()
    def __init__(self, user_id):
        self.user_id = user_id;
class Repository:
    def __init__(self, repo_id, repo_name):
        self.repo_id = repo_id
        self.repo_name = repo_name
class MyCounter:
    def __init__(self, init_val):
        self.counter = init_val
    def __init__(self):
        self.__init__(0)
    def increment(self):
        self.count += 1
    def val(self):
        return self.counter
    
# Types of queries:
#
#  Single query to derive a user ID
#  Mass commit log
class Query:
    def __init__(self):
        self.urlPrefix = 'https://api.github.com/search/commits'
        self.startDate = datingdays.now(timezone('US/Arizona'))
        self.hackers = {}
        self.repos = {}
        self.aliases = {}
        self.resolved_alias_map = {}
        self.commit_to_repo_map = {}
        self.json_repo_map = {}
        self.commit_cache_map = {}
        with open('./web3.github.token', 'r') as f:
            self.token = f.readline()
            self.token = self.token.strip('\n')
            self.headers = {'Authorization': 'token %s' % self.token}
    def add_alias(self, alias, commit_key):
        if (alias not in self.aliases.keys()):
            self.aliases[alias] = []
        self.aliases[alias].append(commit_key)
    
    def reset_last_date(self):
        self.startDate = datingdays.now(timezone('US/Arizona'))
    def set_last_date(self, date):
        new_date = self.startDate
        if date.endswith('Z'):
            date = date[:len(date)-2]
        try:
            new_date = datingdays.fromisoformat(date)
            if (new_date < self.startDate):
    #           Note: GitHub rejected dates with timezones other than "-07:00" (like "+02:00")
    #                 By subtracting the difference (in milliseconds?) we represent the "US/Arizona"
    #                 version of the author-date pulled from previous results
                self.startDate = self.startDate - (self.startDate - new_date)
        except:
            #Skip a bit, brother.
            #Even if this is the very last commit in this set
            # it may be repeated at the beginning of the next
            # query, but won't cause an endless loop. If it's the last
            # commit in the whole set for a particular hacker it will
            # still exit the loop due to a < 100 item result set.
            pass
    def format_user_url(self, user_id):
        var = self.urlPrefix+"?q=author:"+user_id+'+author-date:<'+self.startDate.isoformat()+'&sort=author-date&order=desc&per_page=100&page=1'
        print(var)
        return var
    def load_hacker_url(self, user_id, recurse_count=1):
        retVal = None
        resp = requests.get(self.format_user_url(user_id), headers=self.headers)
        if (resp.status_code == 200):
            retVal = resp.json()
        elif (resp.status_code == 403):
            print('Rate limit EXCEEDED.  Sleeping for a bit. (recursive_count=', recurse_count,')')
            time.sleep(recurse_count * 60)
            self.load_hacker_url(user_id, recurse_count+1)
        else:
            print('Status code returned:', resp.status_code)
            req_headers = resp.request.headers
            for n in req_headers.keys():
                print('\t', n, req_headers[n])
            print(json.dumps(resp.json(), indent=2))
        return retVal
    def preload_alias_map(self, file_name):
        if (exists(file_name)):
            with open(file_name, 'r') as af:
                self.resolved_alias_map = json.load(af)
    def add_commit_id(self, commit_id, repo_name):
        self.commit_to_repo_map[commit_id] = repo_name
    def format_id_check_url(self, commit_id):
        rn = self.commit_to_repo_map[commit_id]
        return 'https://api.github.com/repos/'+rn.owner+'/'+rn.repo_name+'/commits/'+commit_id
    def retrieve_commit(self, commit_hash):
        url = self.format_id_check_url(commit_hash)
        resp = requests.get(self.format_id_check_url(commit_hash), headers=self.headers)
        if (resp.status_code != 200):
            print('ERROR - Status code:', resp.status_code, 'encountered ', url)
            return None
        else:
            return resp
    def process_commit_response(self, resp, sha, alias, recursive=False):
        j = resp.json()
        commit_details_block = j['author']
        if (commit_details_block == None):
            commit_details_block = j['committer']
        if (commit_details_block == None):
            if not recursive and len(j['parents']) > 0:
                hash = j['parents'][0]['sha']
                self.commit_to_repo_map[hash] = self.commit_to_repo_map[sha]
                resp = self.retrieve_commit(hash)
                if (resp is not None):
                    self.process_commit_response(resp, hash, alias, True)
            else:
                print('Unable to find author node within JSON formatted result set')
        else:
            committer = commit_details_block['login']
            if (committer not in self.hackers.keys()):
                print('Creating new hacker object for '+committer+' ['+alias+']')
                self.hackers[committer] = []
            else:
                n = len(self.hackers[committer]) + 1
                print('Adding alias ['+alias+'] for user '+committer+' for a total of ', n)
            self.hackers[committer].append(alias)
            self.resolved_alias_map[alias] = committer
        
    def resolve_aliases(self):
        for alias in self.aliases.keys():
            if alias not in self.resolved_alias_map.keys():
                commit_id = q.aliases[alias][0]  #Lookup just the first one
                print('Resolving ['+alias+'] using commit ID: '+commit_id)
                resp = self.retrieve_commit(commit_id)
                if (resp != None):
                    self.process_commit_response(resp, commit_id, alias)
class RepoCounter:
    def __init__(self, repo_dict):
        self.repo_name = repo_dict['name']
        self.repo_full_name = repo_dict['full_name']
        owner = repo_dict['owner']
        self.owner = owner['login']
        self.count = 0
    def __init__(self, owner, repo):
        self.repo_name = repo
        self.repo_full_name = owner+'/'+self.repo_name
        self.owner = owner
        self.count = 0
    def add_one(self):
        self.count += 1
        
    def key(self):
        return self.repo_full_name

if 0 == 1:    
    with open('./repo_classes.json', "r") as r:
        array = json.load(r)
else:
    array = {}
    with open('./new_repo.log', 'r') as r:
        for l in r.readlines():
            key = l[:-1] #Strip off carriage return
            s = key.split('/')
            array[key] = {'owner':s[0], 'repo_name':s[1], 'repo_full_name':key, 'count':0}

q = Query()

aliasMapName = './aliasMap.json' 
if exists(aliasMapName):
    with open(aliasMapName, 'r') as r:
        q.resolved_alias_map = json.load(r)
if exists('./hackers.json'):
    with open('./hackers.json', 'r') as r:
        q.hackers = json.load(r)
elif q.resolved_alias_map is not None and len(q.resolved_alias_map) > 0:
    for alias in q.resolved_alias_map:
        user_id = q.resolved_alias_map[alias]
        if user_id not in q.hackers.keys():
            q.hackers[user_id] = []
        q.hackers[user_id].append(alias) 
    
for n in array.values():
    owner = n['owner']
    repo_name = n['repo_name']
    repo = RepoName(owner, repo_name)
    print('Processing', owner, repo_name)
    repo_base_dir = './repos'
    result_base_dir = './results'
    repo_path = repo_base_dir+'/'+repo.key()
    result_path = result_base_dir+'/'+repo.key()
    json_stats_file_name = result_path+'/commit_stat_log.json'
#    stat_req_set = StatRequirementSet()
    numstat_req_set = NumstatRequirementSet()
    last_date = datingdays.fromisoformat('1972-12-26T03:23:01.123456-07:00')

    if (os.path.isdir(repo_base_dir) == False):
        print('######### Cannot find '+repo_base_dir+'  Creating it!')
        os.makedirs(repo_base_dir)
    if os.path.isdir(result_base_dir) == False:
        print('Cannot find',result_base_dir,'Creating it!')
        os.makedirs(result_base_dir)
    if os.path.isdir(result_base_dir+'/'+owner) == False:
        os.makedirs(result_base_dir+'/'+owner)
    if os.path.isdir(result_base_dir+'/'+owner+'/'+repo_name) == False:
        os.makedirs(result_base_dir+'/'+owner+'/'+repo_name)
    if (os.path.isdir(repo_base_dir+"/"+owner) == False):
        os.makedirs(repo_base_dir+"/"+owner)
    url = 'https://github.com/'+owner+'/'+repo_name+'.git'
    if (os.path.isdir(repo_path) == False):
        Repo.clone_from(url, repo_path)
    else:
        rp = Repo(repo_path)
        remote = rp.remote()
        remote.pull()
        if exists(json_stats_file_name+'NOT NOT NOT NOT NOT'):
            with open(json_stats_file_name) as j:
                numstat_req_set.resultArray = json.load(j)
            for item in numstat_req_set.resultArray:
                q.commit_cache_map[item['commit']] = item
# Add call to rep.log('-1') to get the date from the latest change
#  If that date is less than the date on the cached stats file
#  then skip this one by loading the previous stats file.
    print('Generating Stats for '+repo_path)
    rep = Git(repo_path)
    stat = rep.log('--numstat')

    numstat_req_set.processDocument(stat)
    q.repos[repo.owner+'/'+repo.repo_name] = numstat_req_set.resultArray.copy()
    
    if repo.owner not in q.json_repo_map.keys():
        q.json_repo_map[repo.owner] = {}
    q.json_repo_map[repo.owner][repo.repo_name] = q.repos[repo.key()]
    with open(json_stats_file_name, 'w') as out:
        out.write(json.dumps(numstat_req_set.resultArray, indent=2))
    for rae in numstat_req_set.resultArray:
        commit_id = rae['commit']
        alias = rae['Author']
        q.add_alias(alias, commit_id)
        q.add_commit_id(commit_id, repo)
        
print('Done loading!')
q.resolve_aliases()

with open('./aliasMap.json', 'w') as out:
    out.write(json.dumps(q.resolved_alias_map, indent=2))
    
with open('./new_repos.json', 'w') as out:
    out.write(json.dumps(q.repos, indent=2))
with open('./hackers.json', 'w') as out:
    out.write(json.dumps(q.hackers, indent=2))
    
for alias in q.aliases.keys():
    v = q.aliases[alias]
    print('Alias ', alias, ' has ', len(v), ' commits')
    
print('How many hackers?', len(q.hackers))    
repo_counter = {}
call_count = 0
if 1 == 0:
    with open('./new_repo.log', 'w') as new_repo_log:
        for hacker in q.hackers:
            done = False
            q.reset_last_date()
            last_count = -1
            while not done:
                body = q.load_hacker_url(hacker)
                call_count += 1
                if call_count % 25 == 0:
                    print(call_count, 'rest API calls made')

                if (body == None):
                    print('Unable to load JSON')
                    done = True
                else:
                    total_count = body['total_count']
                    if (total_count == last_count):
                        print('Identical result set found.  Moving on.', total_count, last_count)
                        done = True
                    else:
                        print(total_count, 'remaining commits for user', hacker)
                    last_count = total_count
                    if total_count > 20000:
                        print('Yikes!', total_count, ' seems like a few too many')
                        done = True
                    incomplete_results = body['incomplete_results']
        #            print(total_count)
        #            print(incomplete_results)
                    array = body['items'];
                    if (array == None or len(array) < 1):
                        done = True
                    else:
                        for n in array:
                            repo = n['repository']
                            repo_full_name = repo['full_name']
                            counter = None
                            if repo_full_name not in repo_counter:
                                counter = RepoCounter(repo)
                                repo_counter[repo_full_name] = counter
                                print('New repo found!', repo_full_name)
                                new_repo_log.write(repo_full_name+'\n')
                            else:
                                counter = repo_counter[repo_full_name]
                            counter.add_one()

                            commit = n['commit']
                            comAuth = commit['author']
                            q.set_last_date(comAuth['date'])
                        if (total_count < 100 and incomplete_results == False):
                            done = True

with open('./hackers.json', 'w') as out:
    out.write(json.dumps(q.hackers, indent=2))
        
with open('./repo_classes.json', 'w') as out:
    out.write(json.dumps(repo_counter,default=lambda o: o.__dict__, 
            sort_keys=True,indent=2))
        


Processing adbenitez mailman
Generating Stats for ./repos/adbenitez/mailman
Processing akintu akintu
Generating Stats for ./repos/akintu/akintu
Processing bbci wyrm
Generating Stats for ./repos/bbci/wyrm
Processing berinhard pyp5js
Generating Stats for ./repos/berinhard/pyp5js
Processing bhrutledge jahhills.com
Generating Stats for ./repos/bhrutledge/jahhills.com
Processing Biohazard1976 pi
Generating Stats for ./repos/Biohazard1976/pi
Processing Biohazard1976 pip
Generating Stats for ./repos/Biohazard1976/pip
Processing bskinn cpython-release-feed
Generating Stats for ./repos/bskinn/cpython-release-feed
Processing bskinn flake8-bot
Generating Stats for ./repos/bskinn/flake8-bot
Processing ClearcodeHQ pytest-postgresql
Generating Stats for ./repos/ClearcodeHQ/pytest-postgresql
Processing controversial luk.ke
Generating Stats for ./repos/controversial/luk.ke
Processing Convex-Dev convex
Generating Stats for ./repos/Convex-Dev/convex
Processing datacraft-dsc starfish-py
Generating Stats 

KeyError: 'login'

In [None]:
from datetime import datetime as datingdays
fmt = '%Y-%m-%dT%H:%M:%S.%f%z'
t = '2020-01-28T15:47:53.000+01:00'
rslt = datingdays.strptime(t, fmt)
print(rslt)
print(rslt.strftime(fmt))


In [None]:
from datetime import datetime as dt
from pytz import timezone
d = dt.fromisoformat('2022-04-11T19:14:33.000+02:00')

#az = timezone('US/Arizona')
#d2 = az.localize(d, is_dst=False)
#d2              


In [None]:
from datetime import datetime
from datetime import timedelta
# Given timestamp in string
time_str = '24/7/2021 11:13:08.230010'
date_format_str = '%d/%m/%Y %H:%M:%S.%f'
# create datetime object from timestamp string
given_time = datetime.strptime(time_str, date_format_str)
print('Given Time: ', given_time)
n = 2
# Subtract 2 hours from datetime object
final_time = given_time - timedelta(hours=n)
print('Final Time (2 hours ahead of given time ): ', final_time)
# Convert datetime object to string in specific format 
final_time_str = final_time.strftime('%d/%m/%Y %H:%M:%S.%f')
print('Final Time as string object: ', final_time_str)

In [None]:
url = 'https://api.github.com/search/commit/fb5f372203f70cc7580f8e9806c00405524649d7

In [None]:
import json
print('Getting started')
rev = {}
with open('./aliasMap.json', "r") as r:
    body = json.load(r)
    for k in body.keys():
        v = body[k]
        if v not in rev.keys():
            rev[v] = []
        rev[v].append(k)
with open('./idToAliasMap.json', 'w') as w:
    w.write(json.dumps(rev, indent=2))    
print('Done!')    