In [1]:
import requests
import json
import hashlib
import os
from datetime import datetime as datingdays
from git import Repo, Git
import sys
project_root_path = '../../..'
python_lib_path = project_root_path + '/python/lib'
sys.path.append(python_lib_path)
from commit_log_parser import StatRequirementSet
from pytz import timezone

def introspect(obj):
  for func in [type, id, dir, vars, callable]:
        print("%s(%s):\t\t%s" % (func.__name__, introspect.__code__.co_varnames[0], func(obj)))
        
def loadCachedURL(url, forceReload = False):
    body = None
    hl = hashlib.new('sha256')
    ba = bytearray(url.encode())
    hl.update(ba)
    thing = hl.hexdigest()
    cachedFileName = './'+thing
    loaded = False;
    if not forceReload:
        try:
            with open(cachedFileName, 'r') as f:
                body = json.load(f)
                f.close()
            loaded = True
        except EnvironmentError:
            pass

    if not loaded:
        resp = requests.get(url)
        if (resp.status_code == 200):
            body = resp.json()
            with open(cachedFileName, 'w') as f:
                f.write(resp.text)
                f.close()
    return body
class Commit:
    sha = None
    date = None
    hacker = None
class RepoName:
    def __init__(self, owner, repo_name):
        self.owner = owner
        self.repo_name = repo_name
    def __hash__(self):
        return hash((self.owner, self.repo_name))

    def __eq__(self, other):
        return (self.owner, self.repo_name) == (other.owner, other.repo_name)

    def __ne__(self, other):
        return not(self == other)        
class Hacker:
    def __init__(self):
        self.user_id = None
        self.commits = []
        self.aliases = Counter()
    def __init__(self, user_id):
        self.user_id = user_id;
class Repository:
    def __init__(self, repo_id, repo_name):
        self.repo_id = repo_id
        self.repo_name = repo_name
class MyCounter:
    def __init__(self, init_val):
        self.counter = init_val
    def __init__(self):
        self.__init__(0)
    def increment(self):
        self.count += 1
    def val(self):
        return self.counter
    
# Types of queries:
#
#  Single query to derive a user ID
#  Mass commit log
class Query:
    def __init__(self):
        self.urlPrefix = 'https://api.github.com/search/commits'
        self.startDate = datingdays.now(timezone('US/Arizona'))
        self.hacker = None
        self.repos = {}
        self.aliases = {}
        with open('./web3.github.token', 'r') as f:
            self.token = f.readline()
            self.token = self.token.strip('\n')
            self.headers = {'Authorization': 'token %s' % self.token}
    def addAlias(self, alias, commit_key):
        if (alias not in self.aliases.keys()):
            self.aliases[alias] = []
        self.aliases[alias].append(commit_key)
        
    
    def set_last_date(self, date):
        new_date = datingdays.fromisoformat(date)
        if (new_date < self.startDate):
#           Note: GitHub rejected dates with timezones other than "-07:00" (like "+02:00")
#                 By subtracting the difference (in milliseconds?) we represent the "US/Arizona"
#                 version of the author-date pulled from previous results
            self.startDate = self.startDate - (self.startDate - new_date)
    def format_url(self):
        var = self.urlPrefix+"?q=author:"+self.hacker.user_id+'+author-date:<'+self.startDate.isoformat()+'&sort=author-date&order=desc&per_page=100&page=1'
        print(var)
        return var
    def load_url(self):
        retVal = None
        resp = requests.get(self.format_url(), headers=self.headers)
        if (resp.status_code == 200):
            retVal = resp.json()
        return retVal
    
with open('./repos.json', "r") as r:
    array = json.load(r)
q = Query()

for n in array:
    owner = n['owner']
    repo_name = n['repo']
    repo_base_dir = './repos'
    repo_path = repo_base_dir+'/'+owner+"/"+repo_name

    if (os.path.isdir(repo_base_dir) == False):
        print('######### Cannot find '+repo_base_dir+'  Creating it!')
        os.makedirs(repo_base_dir)
    if (os.path.isdir(repo_base_dir+"/"+owner) == False):
        os.makedirs(repo_base_dir+"/"+owner)
    url = 'https://github.com/'+owner+'/'+repo_name+'.git'
    if (os.path.isdir(repo_path) == False):
        Repo.clone_from(url, repo_path)
    else:
        rp = Repo(repo_path)
        remote = rp.remote()
        remote.pull()

    print('Generating Stats for '+repo_path)
    rep = Git(repo_path)
    stat = rep.log('--stat')

    stat_req_set = StatRequirementSet()
    stat_req_set.processDocument(stat)
    
    for rae in stat_req_set.resultArray:
        commit_id = rae['commit']
        alias = rae['Author']
        q.addAlias(alias, commit_id)
print('Done loading!')
for alias in q.aliases.keys():
    v = q.aliases[alias]
    print('Alias ', alias, ' has ', len(v), ' commits')
    
q.hacker = Hacker('marhali')    
done = True
while not done:
    body = q.load_url()
     
    if (body == None):
        print('Unable to load JSON')
        done = True
    else:
        total_count = body['total_count']
        incomplete_results = body['incomplete_results']
        print(total_count)
        print(incomplete_results)
        array = body['items'];
        if (array == None or len(array) < 1):
            done = True
        else:
            for n in array:
                author = n['author']
                login = author['login']
                repo = n['repository']
                repoId = repo['id']
                repoName = repo['name']
                commit = n['commit']
                sha = n['sha']
                comAuth = commit['author']
                authorName = comAuth['name']
                authorDate = comAuth['date']
                q.set_last_date(authorDate)
                print(login, repoId, repoName, authorName, authorDate, sha)
            if (total_count < 100 and incomplete_results == False):
                done = True
        


Generating Stats for ./repos/Deadman-DAO/Web3HackerNetwork
Generating Stats for ./repos/enigmatt/new2Web3
Generating Stats for ./repos/oceanprotocol/aquarius
Generating Stats for ./repos/oceanprotocol/contracts
Generating Stats for ./repos/oceanprotocol/market
Generating Stats for ./repos/oceanprotocol/ocean.js
Generating Stats for ./repos/oceanprotocol/ocean.py
Generating Stats for ./repos/oceanprotocol/provider
Generating Stats for ./repos/pypa/warehouse
Done loading!
Alias  enigmatt <git@theenkes.com>  has  18  commits
Alias  DeadmanBob <github@traxel.com>  has  132  commits
Alias  enigmatt <commerce@theenkes.com>  has  5  commits
Alias  Matt Enke <commerce@theenkes.com>  has  6  commits
Alias  = <git@theenkes.com>  has  16  commits
Alias  DeadmanBob <me@example.com>  has  20  commits
Alias  rbb36 <github@traxel.com>  has  1  commits
Alias  Alex Coseru <alex.coseru@gmail.com>  has  251  commits
Alias  Călina Cenan <calina@cenan.net>  has  42  commits
Alias  David Hunt-Mateo <david@o

In [2]:
from datetime import datetime as datingdays
fmt = '%Y-%m-%dT%H:%M:%S.%f%z'
t = '2020-01-28T15:47:53.000+01:00'
rslt = datingdays.strptime(t, fmt)
print(rslt)
print(rslt.strftime(fmt))


2020-01-28 15:47:53+01:00
2020-01-28T15:47:53.000000+0100


In [3]:
import urllib.parse
from github import Github
import os
#xxx = urllib.parse.parse_qs('https://api.github.com/search/commits?q=author:marhali+author-date:<2022-06-01&sort=author-date&order=desc&per_page=100&page=1')
#print(xxx)

In [4]:
from datetime import datetime as dt
from pytz import timezone
d = dt.fromisoformat('2022-04-11T19:14:33.000+02:00')

#az = timezone('US/Arizona')
#d2 = az.localize(d, is_dst=False)
#d2              


In [5]:
from datetime import datetime
from datetime import timedelta
# Given timestamp in string
time_str = '24/7/2021 11:13:08.230010'
date_format_str = '%d/%m/%Y %H:%M:%S.%f'
# create datetime object from timestamp string
given_time = datetime.strptime(time_str, date_format_str)
print('Given Time: ', given_time)
n = 2
# Subtract 2 hours from datetime object
final_time = given_time - timedelta(hours=n)
print('Final Time (2 hours ahead of given time ): ', final_time)
# Convert datetime object to string in specific format 
final_time_str = final_time.strftime('%d/%m/%Y %H:%M:%S.%f')
print('Final Time as string object: ', final_time_str)

Given Time:  2021-07-24 11:13:08.230010
Final Time (2 hours ahead of given time ):  2021-07-24 09:13:08.230010
Final Time as string object:  24/07/2021 09:13:08.230010
