In [1]:
def introspect(obj):
  for func in [type, id, dir, vars, callable]:
        print("%s(%s):\t\t%s" % (func.__name__, introspect.__code__.co_varnames[0], func(obj)))


In [2]:
import requests
import json
import hashlib
import os
from datetime import datetime as datingdays
from git import Repo, Git
import sys
import time
project_root_path = '../../..'
python_lib_path = project_root_path + '/python/lib'
sys.path.append(python_lib_path)
from commit_log_parser import StatRequirementSet
from pytz import timezone
from os.path import exists
        
def loadCachedURL(url, forceReload = False):
    body = None
    hl = hashlib.new('sha256')
    ba = bytearray(url.encode())
    hl.update(ba)
    thing = hl.hexdigest()
    cachedFileName = './'+thing
    loaded = False;
    if not forceReload:
        try:
            with open(cachedFileName, 'r') as f:
                body = json.load(f)
                f.close()
            loaded = True
        except EnvironmentError:
            pass

    if not loaded:
        resp = requests.get(url)
        if (resp.status_code == 200):
            body = resp.json()
            with open(cachedFileName, 'w') as f:
                f.write(resp.text)
                f.close()
    return body
class Commit:
    sha = None
    date = None
    hacker = None
class RepoName:
    def key(self):
        return self.owner+'/'+self.repo_name
    def __init__(self, owner, repo_name):
        self.owner = owner
        self.repo_name = repo_name
    
class Hacker:
    def toJSON(self):
        return json.dumps(self, default=lambda o: o.__dict__, 
            sort_keys=True, indent=2)    
    def __init__(self):
        self.user_id = None
        self.commits = []
        self.aliases = Counter()
    def __init__(self, user_id):
        self.user_id = user_id;
class Repository:
    def __init__(self, repo_id, repo_name):
        self.repo_id = repo_id
        self.repo_name = repo_name
class MyCounter:
    def __init__(self, init_val):
        self.counter = init_val
    def __init__(self):
        self.__init__(0)
    def increment(self):
        self.count += 1
    def val(self):
        return self.counter
    
# Types of queries:
#
#  Single query to derive a user ID
#  Mass commit log
class Query:
    def __init__(self):
        self.urlPrefix = 'https://api.github.com/search/commits'
        self.startDate = datingdays.now(timezone('US/Arizona'))
        self.hackers = {}
        self.repos = {}
        self.aliases = {}
        self.resolved_alias_map = {}
        self.commit_to_repo_map = {}
        self.json_repo_map = {}
        self.commit_cache_map = {}
        with open('./web3.github.token', 'r') as f:
            self.token = f.readline()
            self.token = self.token.strip('\n')
            self.headers = {'Authorization': 'token %s' % self.token}
    def add_alias(self, alias, commit_key):
        if (alias not in self.aliases.keys()):
            self.aliases[alias] = []
        self.aliases[alias].append(commit_key)
    
    def reset_last_date(self):
        self.startDate = datingdays.now(timezone('US/Arizona'))
    def set_last_date(self, date):
        new_date = self.startDate
        if date.endswith('Z'):
            date = date[:len(date)-2]
        try:
            new_date = datingdays.fromisoformat(date)
            if (new_date < self.startDate):
    #           Note: GitHub rejected dates with timezones other than "-07:00" (like "+02:00")
    #                 By subtracting the difference (in milliseconds?) we represent the "US/Arizona"
    #                 version of the author-date pulled from previous results
                self.startDate = self.startDate - (self.startDate - new_date)
        except:
            #Skip a bit, brother.
            #Even if this is the very last commit in this set
            # it may be repeated at the beginning of the next
            # query, but won't cause an endless loop. If it's the last
            # commit in the whole set for a particular hacker it will
            # still exit the loop due to a < 100 item result set.
            pass
    def format_user_url(self, user_id):
        var = self.urlPrefix+"?q=author:"+user_id+'+author-date:<'+self.startDate.isoformat()+'&sort=author-date&order=desc&per_page=100&page=1'
        print(var)
        return var
    def load_hacker_url(self, user_id, recurse_count=1):
        retVal = None
        resp = requests.get(self.format_user_url(user_id), headers=self.headers)
        if (resp.status_code == 200):
            retVal = resp.json()
        elif (resp.status_code == 403):
            print('Rate limit EXCEEDED.  Sleeping for a bit. (recursive_count=', recurse_count,')')
            time.sleep(recurse_count * 60)
            self.load_hacker_url(user_id, recurse_count+1)
        else:
            print('Status code returned:', resp.status_code)
            req_headers = resp.request.headers
            for n in req_headers.keys():
                print('\t', n, req_headers[n])
            print(json.dumps(resp.json(), indent=2))
        return retVal
    def preload_alias_map(self, file_name):
        if (exists(file_name)):
            with open(file_name, 'r') as af:
                self.resolved_alias_map = json.load(af)
    def add_commit_id(self, commit_id, repo_name):
        self.commit_to_repo_map[commit_id] = repo_name
    def format_id_check_url(self, commit_id):
        rn = self.commit_to_repo_map[commit_id]
        return 'https://api.github.com/repos/'+rn.owner+'/'+rn.repo_name+'/commits/'+commit_id
    def retrieve_commit(self, commit_hash):
        url = self.format_id_check_url(commit_hash)
        resp = requests.get(self.format_id_check_url(commit_hash), headers=self.headers)
        if (resp.status_code != 200):
            print('ERROR - Status code:', resp.status_code, 'encountered ', url)
            return None
        else:
            return resp
    def process_commit_response(self, resp, sha, alias, recursive=False):
        j = resp.json()
        commit_details_block = j['author']
        if (commit_details_block == None):
            commit_details_block = j['committer']
        if (commit_details_block == None):
            if not recursive and len(j['parents']) > 0:
                hash = j['parents'][0]['sha']
                self.commit_to_repo_map[hash] = self.commit_to_repo_map[sha]
                resp = self.retrieve_commit(hash)
                if (resp is not None):
                    self.process_commit_response(resp, hash, alias, True)
            else:
                print('Unable to find author node within JSON formatted result set')
        else:
            committer = commit_details_block['login']
            if (committer not in self.hackers.keys()):
                print('Creating new hacker object for '+committer+' ['+alias+']')
                self.hackers[committer] = []
            else:
                n = len(self.hackers[committer]) + 1
                print('Adding alias ['+alias+'] for user '+committer+' for a total of ', n)
            self.hackers[committer].append(alias)
            self.resolved_alias_map[alias] = committer
        
    def resolve_aliases(self):
        for alias in self.aliases.keys():
            if alias not in self.resolved_alias_map.keys():
                commit_id = q.aliases[alias][0]  #Lookup just the first one
                print('Resolving ['+alias+'] using commit ID: '+commit_id)
                resp = self.retrieve_commit(commit_id)
                if (resp != None):
                    self.process_commit_response(resp, commit_id, alias)
class RepoCounter:
    def __init__(self, repo_dict):
        self.repo_name = repo_dict['name']
        self.repo_full_name = repo_dict['full_name']
        owner = repo_dict['owner']
        self.owner = owner['login']
        self.count = 0
                          
    def add_one(self):
        self.count += 1
        
    def key(self):
        return self.repo_full_name

    
with open('./repos.json', "r") as r:
    array = json.load(r)

q = Query()

aliasMapName = './aliasMap.json' 
if exists(aliasMapName):
    with open(aliasMapName, 'r') as r:
        q.resolved_alias_map = json.load(r)
if exists('./hackers.json'):
    with open('./hackers.json', 'r') as r:
        q.hackers = json.load(r)
elif q.resolved_alias_map is not None and len(q.resolved_alias_map) > 0:
    for alias in q.resolved_alias_map:
        user_id = q.resolved_alias_map[alias]
        if user_id not in q.hackers.keys():
            q.hackers[user_id] = []
        q.hackers[user_id].append(alias) 
    
for n in array:
    owner = n['owner']
    repo_name = n['repo']
    repo = RepoName(owner, repo_name)
    print('Processing', owner, repo_name)
    repo_base_dir = './repos'
    repo_path = repo_base_dir+'/'+repo.key()
    json_stats_file_name = repo_path+'/commit_stat_log.json'
    stat_req_set = StatRequirementSet()
    last_date = datingdays.fromisoformat('1972-12-26T03:23:01.123456-07:00')

    if (os.path.isdir(repo_base_dir) == False):
        print('######### Cannot find '+repo_base_dir+'  Creating it!')
        os.makedirs(repo_base_dir)
    if (os.path.isdir(repo_base_dir+"/"+owner) == False):
        os.makedirs(repo_base_dir+"/"+owner)
    url = 'https://github.com/'+owner+'/'+repo_name+'.git'
    if (os.path.isdir(repo_path) == False):
        Repo.clone_from(url, repo_path)
    else:
        rp = Repo(repo_path)
        remote = rp.remote()
        remote.pull()
        if exists(json_stats_file_name):
            with open(json_stats_file_name) as j:
                stat_req_set.resultArray = json.load(j)
            for item in stat_req_set.resultArray:
                q.commit_cache_map[item['commit']] = item
# Add call to rep.log('-1') to get the date from the latest change
#  If that date is less than the date on the cached stats file
#  then skip this one by loading the previous stats file.
    print('Generating Stats for '+repo_path)
    rep = Git(repo_path)
    stat = rep.log('--stat')

    stat_req_set.processDocument(stat)
    q.repos[repo.owner+'/'+repo.repo_name] = stat_req_set.resultArray.copy()
    
    if repo.owner not in q.json_repo_map.keys():
        q.json_repo_map[repo.owner] = {}
    q.json_repo_map[repo.owner][repo.repo_name] = q.repos[repo.key()]
    with open(json_stats_file_name, 'w') as out:
        out.write(json.dumps(stat_req_set.resultArray, indent=2))
    for rae in stat_req_set.resultArray:
        commit_id = rae['commit']
        alias = rae['Author']
        q.add_alias(alias, commit_id)
        q.add_commit_id(commit_id, repo)
        
print('Done loading!')
q.resolve_aliases()

with open('./aliasMap.json', 'w') as out:
    out.write(json.dumps(q.resolved_alias_map, indent=2))
    
with open('./new_repos.json', 'w') as out:
    out.write(json.dumps(q.repos, indent=2))
with open('./hackers.json', 'w') as out:
    out.write(json.dumps(q.hackers, indent=2))
    
for alias in q.aliases.keys():
    v = q.aliases[alias]
    print('Alias ', alias, ' has ', len(v), ' commits')
    
print('How many hackers?', len(q.hackers))    
repo_counter = {}
call_count = 0
with open('./new_repo.log', 'w') as new_repo_log:
    for hacker in q.hackers:
        done = False
        q.reset_last_date()
        last_count = -1
        while not done:
            body = q.load_hacker_url(hacker)
            call_count += 1
            if call_count % 25 == 0:
                print(call_count, 'rest API calls made')

            if (body == None):
                print('Unable to load JSON')
                done = True
            else:
                total_count = body['total_count']
                if (total_count == last_count):
                    print('Identical result set found.  Moving on.', total_count, last_count)
                    done = True
                else:
                    print(total_count, 'remaining commits for user', hacker)
                last_count = total_count
                if total_count > 20000:
                    print('Yikes!', total_count, ' seems like a few too many')
                    done = True
                incomplete_results = body['incomplete_results']
    #            print(total_count)
    #            print(incomplete_results)
                array = body['items'];
                if (array == None or len(array) < 1):
                    done = True
                else:
                    for n in array:
                        repo = n['repository']
                        repo_full_name = repo['full_name']
                        counter = None
                        if repo_full_name not in repo_counter:
                            counter = RepoCounter(repo)
                            repo_counter[repo_full_name] = counter
                            print('New repo found!', repo_full_name)
                            new_repo_log.write(repo_full_name+'\n')
                        else:
                            counter = repo_counter[repo_full_name]
                        counter.add_one()

                        commit = n['commit']
                        comAuth = commit['author']
                        q.set_last_date(comAuth['date'])
                    if (total_count < 100 and incomplete_results == False):
                        done = True

with open('./hackers.json', 'w') as out:
    out.write(json.dumps(q.hackers, indent=2))
        
with open('./repo_classes.json', 'w') as out:
    out.write(json.dumps(repo_counter,default=lambda o: o.__dict__, 
            sort_keys=True,indent=2))
        


Processing Deadman-DAO Web3HackerNetwork
Generating Stats for ./repos/Deadman-DAO/Web3HackerNetwork
Processing enigmatt new2Web3
Generating Stats for ./repos/enigmatt/new2Web3
Processing oceanprotocol aquarius
Generating Stats for ./repos/oceanprotocol/aquarius
Processing oceanprotocol contracts
Generating Stats for ./repos/oceanprotocol/contracts
Processing oceanprotocol market
Generating Stats for ./repos/oceanprotocol/market
Processing oceanprotocol ocean.js
Generating Stats for ./repos/oceanprotocol/ocean.js
Processing oceanprotocol ocean.py
Generating Stats for ./repos/oceanprotocol/ocean.py
Processing oceanprotocol provider
Generating Stats for ./repos/oceanprotocol/provider
Processing pypa warehouse
Generating Stats for ./repos/pypa/warehouse
Done loading!
Resolving [eruizgar91 <enrique@oceanprotocol.com>] using commit ID: 6540764112ec4801cf2ecd6d9bdb121ee1d89c06
Unable to find author node within JSON formatted result set
Resolving [ClaudiaHolhos <claudia@oceanprotocol.com>] usi

In [3]:
from datetime import datetime as datingdays
fmt = '%Y-%m-%dT%H:%M:%S.%f%z'
t = '2020-01-28T15:47:53.000+01:00'
rslt = datingdays.strptime(t, fmt)
print(rslt)
print(rslt.strftime(fmt))


2020-01-28 15:47:53+01:00
2020-01-28T15:47:53.000000+0100


In [4]:
from datetime import datetime as dt
from pytz import timezone
d = dt.fromisoformat('2022-04-11T19:14:33.000+02:00')

#az = timezone('US/Arizona')
#d2 = az.localize(d, is_dst=False)
#d2              


In [5]:
from datetime import datetime
from datetime import timedelta
# Given timestamp in string
time_str = '24/7/2021 11:13:08.230010'
date_format_str = '%d/%m/%Y %H:%M:%S.%f'
# create datetime object from timestamp string
given_time = datetime.strptime(time_str, date_format_str)
print('Given Time: ', given_time)
n = 2
# Subtract 2 hours from datetime object
final_time = given_time - timedelta(hours=n)
print('Final Time (2 hours ahead of given time ): ', final_time)
# Convert datetime object to string in specific format 
final_time_str = final_time.strftime('%d/%m/%Y %H:%M:%S.%f')
print('Final Time as string object: ', final_time_str)

Given Time:  2021-07-24 11:13:08.230010
Final Time (2 hours ahead of given time ):  2021-07-24 09:13:08.230010
Final Time as string object:  24/07/2021 09:13:08.230010


In [6]:
url = 'https://api.github.com/search/commit/fb5f372203f70cc7580f8e9806c00405524649d7

SyntaxError: EOL while scanning string literal (836935326.py, line 1)

In [None]:
import json
print('Getting started')
rev = {}
with open('./aliasMap.json', "r") as r:
    body = json.load(r)
    for k in body.keys():
        v = body[k]
        if v not in rev.keys():
            rev[v] = []
        rev[v].append(k)
with open('./idToAliasMap.json', 'w') as w:
    w.write(json.dumps(rev, indent=2))    
print('Done!')    