# Working with Different Software Metric Groups

The ScrumSaga system extracts many different (100+) data metrics, with transformation alorithms for many more.  This can seem unmanageable, at first glance, but becomes intuitive after learning the various categories and sub-categories.

### Preparations
_Set-up Environment_

In [1]:
# Configure
import requests

# URI root
URL = "http://api.scrumsaga.com/v1"

# Acocunt information (must be manipulated on website: scrumsaga.com)
SAGA_ACCT = {"email":"dev.team@mgmt-tech.org","password":"********"}

# Sign-in for token
rte = "/login"
r = requests.post(URL+rte, data=SAGA_ACCT)
r.json()['msg']

'passwords match'

_Check Available Repo Data_

In [2]:
# This is the ScrumSaga user information
hdr={'Authorization': 'JWT '+r.json()["token"] }

# The /acctData route is used to view avialable repo data
rte1 = '/acctData'
r1 = requests.post(URL+rte1, headers=hdr)
r1.json()['data']

['IMTorg--testprj_Java_aSimple',
 'IMTorgTestProj--demoprj_C-_nerddinner',
 'IMTorgTestProj--demoprj_Java_HumanResourceApp',
 'IMTorgTestProj--vue-cli-todomvc']

_Load Data from Repo_ 

In [3]:
import timeit

start_time = timeit.default_timer()
# code you want to evaluate
elapsed = timeit.default_timer() - start_time

def time_usage(func):
    def wrapper(*args, **kwargs):
        start_time = timeit.default_timer()
        retval = func(*args, **kwargs)
        elapsed = timeit.default_timer() - start_time
        print(" -elapsed time: %f" % (elapsed))
        return retval
    return wrapper

In [4]:
import pandas 

class Repo:
    
    def __init__(self, namespace, email, repo_name):
        self.namespace = namespace
        self.email = email
        self.repo_name = repo_name
 

In [5]:
class Project(dict):
    
    """ container for project data """
    # system (class) config
    __acct = " token "
    __repo_name = " {'namespace':'IMTorg', 'email':'dev-team@mgmt-tech.org', 'repo':'testprj_Java_aSimple'} "
    __server = 'http://api.scrumsaga.com/v1'
    __groups = ('project', 'size', 'author', 'entity_structure', 'entity_characteristic', 'complexity', 'relation', 'error','quality','tag','process_log')
    __selection = ('current','all')
    
    
    def __init__(self, acct=__acct, repo=__repo_name):
        super( Project, self ).__init__()
                    
        # typical dict attributes minus those that are intrusive (possible removal of groups)
        def __setitem__(self, key, item):
            self.__dict__[key] = item
        def __getitem__(self, key): 
            return self.__dict__[key]
        def __len__(self): 
            return len(self.__dict__)
        def copy(self):
            return self.__dict__.copy()
        def keys(self):
            return self.__dict__.keys()
        def __cmp__(self, dict):
            return cmp(self.__dict__, dict)
        def __contains__(self, item):
            return item in self.__dict__
        def __iter__(self):
            return iter(self.__dict__)
        def __unicode__(self):
            return unicode(repr(self.__dict__)) 
        
        # instance attributes
        self.prj_acct = acct
        self.prj_repo_name = repo
        for grp in Project.__groups:
            self.__setitem__(grp, 'unavailable')
    
    # pandas integration methods
    # FIND WAY TO - apply function to all keys without creating a method
    def __repr__(self):
        return "<class {0}{1}>".format(self.__module__.__name__,self.__class__.__name__)
    
    def __dir__(self):
        return self.keys()

    def shape(self):
        tmp = list(self.keys())
        [print(i,":",self[i].shape) for i in tmp]
        
    def columns(self):
        tmp = list(self.keys())
        [print(i,": \n",self[i].columns) for i in tmp]
        
    # api methods
    # def extract_data():    
    
    @time_usage  
    def load_group (self, metric_group='project', account=__acct, repo=__repo_name, server=__server):
        route = '/load/'
        try:
            if metric_group in Project.__groups:
                url = server + route + metric_group
                headers = {'Authorization': 'JWT '+account}
                payload = {'namespace':repo.namespace, 'email':repo.email, 'repo':repo.repo_name}
                r = requests.post(url, data = payload, headers=headers)
                rec=r.json()['data']
                prec=pandas.DataFrame(rec)
                print(metric_group.upper(),"group records: ",prec.shape[1])
                return prec
            else:
                print(metric_group+" is not an available group")
        except:
            print('there was a problem')
    
    def load_all (self):
        err = []
        for grp in self.keys():
            try:
                tmp = self.load_group(grp, self.prj_acct, self.prj_repo_name)
                self.__setitem__(grp, tmp)
                self.__setattr__(grp, tmp.__getattr__)
            except:
                self.group[grp] = 'unavailable'
                err.append(grp)
        if len(err)>0:
            print("Loading completed with the following groups missing:",err)
        else:
            print("Loading completed with no errors")
    
    def extract (self, selection='current', account=__acct, repo=__repo_name, server=__server):
        route = '/extract'
        try:
            if selection in Project.__selection:
                headers = {'Authorization': 'JWT '+account}
                url = server + route
                payload = {'namespace':repo.namespace, 'email':repo.email, 'repo':repo.repo_name, 'selection':selection}
                r = requests.post(url, headers=headers, data=payload)
                print(r.json()['message'])
                return r
            else:
                print(selection+" is not an available selection")
        except:
            print('there was a problem')

    


In [6]:
import collections

class Portfolio(collections.MutableSequence):
    """ container for multiple Projects """
    __allowedType = Project

    def __init__(self, *args):
        self.allowedType = Portfolio.__allowedType
        super(Portfolio, self).__init__()        
        self._list = list()
        if args is not None:
            for arg in args:
            #    if isinstance(arg, self.allowedType):
            #        print(arg.__class__.__name__)
            #        print(self.allowedType)
                   self._list.extend(arg)
            #    else:
            #        print('not of type: ',self.allowedType)

    def check(self, v):
        if not isinstance(v.__class__.__name__, self.allowedType):
            raise TypeError(v)

    def __len__(self): return len(self._list)
    def __getitem__(self, i): return self._list[i]
    def __delitem__(self, i): del self._list[i]
    def __setitem__(self, i, v):
        #self.check(v)
        self._list[i] = v
    def insert(self, i, v):
        #self.check(v)
        self._list.insert(i, v)
    def __extend__(self, v):
        #self.check(v)
        self._list.extend(v)

In [7]:
test = Portfolio(JSimple, JHrApp)
#JSimple.__class__.__name__
#isinstance(JSimple, Project)
#type(JSimple)
#JSimple.__class__.__name__
#print(JSimple)


NameError: name 'JSimple' is not defined

In [251]:
test.append(JSimple)
test[20]

<Project>

### Simple Java Project

In [8]:
# create project
TKN = r.json()["token"]
REPO = Repo('IMTorg','dev-team@mgmt-tech.org','testprj_Java_aSimple')
JSimple = Project(TKN, REPO)

In [9]:
#JSimple.__repr__()
JSimple.__class__.__name__

'Project'

In [10]:
# load specific metric group
sz = JSimple.load_group(metric_group='size', account=TKN, repo=REPO)

SIZE group records:  14
 -elapsed time: 0.238082


In [11]:
sz.columns

Index(['count', 'files_count', 'files_size', 'hash', 'id', 'loc_add',
       'loc_del', 'loc_total', 'modified_file_count', 'original_file_count',
       'prj_id', 'project', 'stamp', 'tag_count'],
      dtype='object')

In [12]:
# load all metric groups
JSimple.load_all()

SIZE group records:  14
 -elapsed time: 0.249205
RELATION group records:  0
 -elapsed time: 0.217104
PROJECT group records:  27
 -elapsed time: 0.235256
ERROR group records:  0
 -elapsed time: 0.228542
ENTITY_STRUCTURE group records:  10
 -elapsed time: 0.459302
PROCESS_LOG group records:  5
 -elapsed time: 0.224964
AUTHOR group records:  6
 -elapsed time: 0.229809
ENTITY_CHARACTERISTIC group records:  25
 -elapsed time: 0.459292
COMPLEXITY group records:  0
 -elapsed time: 0.221575
TAG group records:  0
 -elapsed time: 0.219678
QUALITY group records:  0
 -elapsed time: 0.225189
Loading completed with no errors


In [13]:
JSimple.__repr__()

AttributeError: 'str' object has no attribute '__name__'

In [14]:
# project level attributes
JSimple.shape()

size : (14, 14)
relation : (0, 0)
project : (14, 27)
error : (0, 0)
entity_structure : (234, 10)
process_log : (3, 5)
author : (2, 6)
entity_characteristic : (128, 25)
complexity : (0, 0)
tag : (0, 0)
quality : (0, 0)


In [15]:
# access typical pandas attributes
#JSimple['author'].shape, 
#JSimple['author'].describe, 
#JSimple['author'].columns, 
#JSimple['author'].head

In [16]:
# *** GET ATTR TO SHOW WITH TAB COMPLETION
# Ref:
# https://docs.python.org/3/library/functions.html#dir
# https://goodcode.io/articles/python-dict-object/
#
#JSimple['author'].__getattr__('author_email')
#cmt.__getattr__
#JSimple['commit'].__setattr__(cmt)
#help(JSimple['commit'].__getattr__)

In [17]:
JSimple.keys()

dict_keys(['size', 'relation', 'project', 'error', 'entity_structure', 'process_log', 'author', 'entity_characteristic', 'complexity', 'tag', 'quality'])

In [18]:
JSimple.columns()

size : 
 Index(['count', 'files_count', 'files_size', 'hash', 'id', 'loc_add',
       'loc_del', 'loc_total', 'modified_file_count', 'original_file_count',
       'prj_id', 'project', 'stamp', 'tag_count'],
      dtype='object')
relation : 
 Index([], dtype='object')
project : 
 Index(['author_add', 'author_commits_count', 'author_del', 'author_files_size',
       'author_id', 'author_modified_count', 'author_original_count',
       'author_paths_count', 'author_total', 'authors_count', 'hash', 'id',
       'prj_id', 'project', 'release_count', 'reviewer_add',
       'reviewer_commits_count', 'reviewer_del', 'reviewer_files_size',
       'reviewer_modified_count', 'reviewer_name', 'reviewer_original_count',
       'reviewer_paths_count', 'reviewer_total', 'stamp', 'stamp_author',
       'subject'],
      dtype='object')
error : 
 Index([], dtype='object')
entity_structure : 
 Index(['child_of', 'child_of_id', 'created_hash', 'entity_name', 'entity_type',
       'ext', 'id', 'last_befor