### Basic Functions for Interactively Exploring the CORTX Metrics Stored in Pickles

In [12]:
%cd /home/johnbent/cortx/metrics
import cortx_community
import os
from github import Github
gh = Github(os.environ.get('GH_OATH'))
stx = gh.get_organization('Seagate')
repos = cortx_community.get_repos()
ps = cortx_community.PersistentStats()

/home/johnbent/cortx/metrics


In [24]:
# a function which can test the progress of a running scrape_metrics.py process
def check_scan_progress(date,ps):
    done=0
    for repo in ps.get_repos():
        (a,b)=ps.get_latest(repo)
        if b == date:
            done+=1
        #print("Last report for %s is %s" % (repo,b))
    print("%d out of %d repos have been scanned" % (done,len(ps.get_repos())))

In [14]:
# a function for comparing a field in a repo over time
# for example, if you want to see the change in innersource_committers over time, you can use this function
def compare_fields(ps,repo,field,verbose=False):
    first = None
    last = None
    for date in sorted(ps.stats[repo].keys()):
        last = ps.stats[repo][date][field]
        if first is None:
            first = last
        if verbose:
            print("%s -> %s" % (date, last))
    print("Difference between first and last is: %s" % (first-last))
    print("Difference between last and first is: %s" % (last-first))

In [15]:
compare_fields(ps=ps,repo='cortx',field='innersource_committers',verbose=False)

Difference between first and last is: {'xahmad', 'gregnsk', 'amolkongre'}
Difference between last and first is: set()


In [16]:
compare_fields(ps=ps,repo='cortx',field='external_email_addresses',verbose=False)

Difference between first and last is: {'hemant.raut407@gmail.com', 'sawake.nikhil@gmail.com'}
Difference between last and first is: {'iWangJiaXiang@outlook.com', 'pttpzp@hotmail.com', 'mikesevilla3@gmail.com', 'zhoutong12589@163.com', 'surikov01@icloud.com', 'sora@morimoto.io', 'wanghaoyu@frazil.me', 'dominic097@gmail.com', 'liangsibin@gmail.com', 'venkat.crescentian@gmail.com, dominic097@gmail.com', 'kchai@redhat.com', 'szhilkin@gmail.com', 'github@monster010.de', 'me@mko.io', 'matthew@tldus.com', 'kevin@kdecherf.com', 'renoir42@yahoo.com', 'andreas@gaupmann.net', 'james@ustc.edu.cn', 'matth@danielito.org', 'rogerio.pontes@pm.me', 'jiawei1227@pm.me', 'github@stillwater-sc.com', 'aaron@heyaaron.com', 'lenz@grimmer.com', 'contact@WilliamMcGann.com', 'caoshiwei@gmail.com', 'w.stief@gmx.net', 'brienarabella@gmail.com', 'plamen.m.petkov@gmail.com', 'i@4leaf.me', 'xiena89@gmail.com', 'my3157@hotmail.com', 'ariel.shtul@redislabs.com', 'grezboo@gmail.com', 'pingsivapong@hotmail.com', 'eric.yu

In [17]:
targets=['issues_closed_ave_age_in_s','issues_closed']
for target in targets:
    for r in ['GLOBAL','cortx-ha','cortx-hare']:
        print("%s %s -> %d " % (r, target, ps.stats[r]['2020-12-29'][target]))

GLOBAL issues_closed_ave_age_in_s -> 4990879 
cortx-ha issues_closed_ave_age_in_s -> 3722564 
cortx-hare issues_closed_ave_age_in_s -> 3960006 
GLOBAL issues_closed -> 857 
cortx-ha issues_closed -> 24 
cortx-hare issues_closed -> 572 


In [25]:
check_scan_progress('2021-01-02',ps)

19 out of 19 repos have been scanned


In [20]:
(a,b)=ps.get_latest('cortx-hare')
a['issues_open']
b

'2021-01-02'

In [36]:
ps.stats['GLOBAL']['2021-01-02']['stars']


{('3Domse3', datetime.datetime(2020, 12, 3, 13, 16, 46)),
 ('Aar7', datetime.datetime(2020, 11, 30, 16, 12, 38)),
 ('AbhishekPawar5', datetime.datetime(2020, 6, 3, 2, 30, 15)),
 ('AndrewTsao', datetime.datetime(2020, 9, 29, 11, 12, 45)),
 ('Azure99', datetime.datetime(2020, 10, 28, 8, 39, 48)),
 ('Billjessm', datetime.datetime(2020, 10, 7, 1, 16, 23)),
 ('Bronts', datetime.datetime(2020, 10, 28, 4, 58, 24)),
 ('ByronScottJones', datetime.datetime(2020, 9, 28, 9, 37, 8)),
 ('Caron-Huang', datetime.datetime(2020, 10, 28, 15, 51, 1)),
 ('Cookiekira', datetime.datetime(2020, 10, 28, 10, 1, 27)),
 ('DNA', datetime.datetime(2020, 10, 19, 22, 37, 45)),
 ('DammianMiller', datetime.datetime(2020, 9, 25, 5, 53, 27)),
 ('DavidAlphaFox', datetime.datetime(2020, 10, 14, 2, 45, 20)),
 ('Ehekatl', datetime.datetime(2020, 9, 25, 3, 1, 48)),
 ('EricYT', datetime.datetime(2020, 10, 26, 7, 25, 49)),
 ('FYGLE', datetime.datetime(2020, 10, 10, 21, 8, 13)),
 ('FirePontiac', datetime.datetime(2020, 11, 27, 1

In [47]:
# this block is a one-time thing to add historical data from before we automated the scraping
d1={'innersource_participants' : 5, 'pull_requests_external' : 0, 
    'external_participants' : 0,
    'watchers' : 34, 'stars' : 19, 'forks' : 13, 'views_unique_14_days' : 106,
    'clones_count_14_days' : 38, 'clones_unique_14_days' : 4,
    'seagate_blog_referrer_uniques' : 0, 'seagate_referrer_uniques' : 0,
    'downloads_vms' : 0}
d1_date='2020-05-19'
d2={'innersource_participants' : 8, 'pull_requests_external' : 0, 
    'external_participants' : 0,
    'watchers' : 69, 'stars' : 52, 'forks' : 42, 
    'views_unique_14_days' : 86,
    'clones_count_14_days' : 15, 'clones_unique_14_days' : 6,
    'seagate_blog_referrer_uniques' : 0, 'seagate_referrer_uniques' : 0,
    'downloads_vms' : 0}
d2_date='2020-07-06'
d3={'innersource_participants' : 18, 'pull_requests_external' : 1, 
    'external_participants' : 0,
    'watchers' : 62, 'stars' : 116, 'forks' : 31, 
    'views_unique_14_days' : 1817,
    'clones_count_14_days' : 468, 'clones_unique_14_days' : 224,
    'seagate_blog_referrer_uniques' : 0, 'seagate_referrer_uniques' : 0,
    'downloads_vms' : 130}
d3_date='2020-10-07'
d4={'innersource_participants' : 18, 'pull_requests_external' : 4, 
    'external_participants' : 0,
    'watchers' : 65, 'stars' : 159, 'forks' : 45, 
    'views_unique_14_days' : 817,
    'clones_count_14_days' : 1851, 'clones_unique_14_days' : 259,
    'seagate_blog_referrer_uniques' : 0, 'seagate_referrer_uniques' : 0,
    'downloads_vms' : 363}
d4_date='2020-11-03'
print(d1)
#ps.add_stats(date=d1_date,repo='GLOBAL',stats=d1)
#ps.add_stats(date=d2_date,repo='GLOBAL',stats=d2)
#ps.add_stats(date=d3_date,repo='GLOBAL',stats=d3)
#ps.add_stats(date=d4_date,repo='GLOBAL',stats=d4)

{'clones_unique_14_days': 4, 'pull_requests_external': 0, 'seagate_blog_referrer_uniques': 0, 'watchers': 34, 'downloads_vms': 0, 'seagate_referrer_uniques': 0, 'views_unique_14_days': 106, 'innersource_participants': 5, 'forks': 13, 'stars': 19, 'external_participants': 0, 'clones_count_14_days': 38}


In [42]:
ps.get_dates('GLOBAL')

['2020-05-19',
 '2020-07-06',
 '2020-10-07',
 '2020-11-03',
 '2020-12-20',
 '2020-12-21',
 '2020-12-23',
 '2020-12-24',
 '2020-12-26',
 '2020-12-29',
 '2020-12-30',
 '2021-01-02']