In [1]:
import pandas as pd
from anytree import Node, RenderTree
from anytree.exporter import DotExporter

In [2]:
from devanalyst.simulation.simulationModels import Random
from devanalyst.simulation.generateTimecards import IdCounter

importing Jupyter notebook from c:\users\aleja\documents\code\chateauclaudia-labs\devanalyst\devanalyst\simulation\simulationModels.ipynb
importing Jupyter notebook from c:\users\aleja\documents\code\chateauclaudia-labs\devanalyst\devanalyst\simulation\statics.ipynb
importing Jupyter notebook from c:\users\aleja\documents\code\chateauclaudia-labs\devanalyst\devanalyst\simulation\businessObjects.ipynb
importing Jupyter notebook from c:\users\aleja\documents\code\chateauclaudia-labs\devanalyst\devanalyst\simulation\generateTimecards.ipynb


<h1>Pilot of Java metadata</h1>

In [5]:
class JClass:
    # -qualified_name: a string providing the unique identifier for this class, in the form of a qualified name, i.e.,
    #                  of the form <package>.<class name>
    # -parent: a string, consisting of the qualified name for the parent class from which this class derives.
    # -methods: an array of JMethod instances, all with different names, which comprise the methods of this type.
    def __init__(self, package, name, parent, methods):
        self.qualified_name = package
        self.parent  = parent
        self.methods = methods

In [6]:
class JMethod:
    # -name: a string, identifying uniquely this method within the JClass instance to which the method belongs
    # -inputs: an array of JClass instances, corresponding to the input parameter types for the method. Could be an empty
    #  array
    # -output: an array of JClass instances, correspoding to the output type for the method. Could be an empty array.
    def _init__(self, name, inputs, outputs):
        self.name    = name
        self.inputs  = inputs
        self.outputs = outputs

In [7]:
def initCodeBase():
    Date        = JClass('util.Date', None, [])
    Double      = JClass('util.Double', None, [])
    Curve       = JClass('marketdata.Curve', None, [])
    YieldCurve  = JClass('marketdata.ir.YieldCurve', 'marketdata.Curve', [JMethod('interpolate', [Date], [Double])])
    Instrument  = JClass('instrument.Instrument', None, [])
    IRSwap      = JClass('instrument.ir.IRSwap', 'instrument.Instrument', [JMethod('computePV', [YieldCurve], [Double])])
    

<h1>Model of Blocks of Fields and Flags</h1>

In [3]:
class RefactoringModel:
    def __init__(self):
        self.random            = Random() # used for all stochastic-related calculations.

In [4]:
class Field:
    # -name: a string. Uniquely identifies the field among all parameters of all CodeFragments.
    def __init__(self, name):
        self.name = name

In [5]:
class FieldsBlock:
    # -fields: a Python list of Field instances.
    def __init__(self, fields):
        self.fields = fields

In [6]:
class Interface:
    def __init__(self, parameters, output):        
        self.interfaceId = None
        self.parameters  = parameters
        self.output      = output     

In [7]:
class CodeFragment:
    # -fragmentId: a string. Uniquel identifies of this CodeFragment instance among all other CodeFragments.
    # -parameters: list of Field instances
    # -output: list of Field instances
    # -loc: integer, corresponding to the number of lines of code for this code fragment
    # -calls: array of strings, corresponding to the fragmentIds of other CodeFragment instances that this CodeFragment 
    # instance directly invokes. It is empty by default
    # -plugins: array of strings, corresponding to the interfaceIds of Interface instances that this CodeFragment invokes
    #  (i.e., manifestations of the Dependency Inversion Principle - refer to the book "Clear Architecture" by 
    #  Robert C. Martin). It is empty by default
    # -implements: array of strings, corresponding to the interfaceIds of Interface instances that this CodeFragment
    #  implements. Adherance to the Liskov Substitution Principle is assumed. Empty by default.
    # -superTypes: array of strings, corresponding to fragmentIds of CodeFragments that this CodeFragment intance
    #  derives from. Adherence to the Liskov Substitution Principle is assumed so, for example, no functionality in 
    #  any parent is overwritten. Only new functionality is added (new methods, new fields). Empty by default.
    # -storyIds: array of strings, corresponding to the UserStoryId's of user stories whose behaviour require
    #  execution of the business logic in this CodeFragment instance.  Empty by default.
    def __init__(self, parameters, output, loc, 
                 calls = [], plugins = [], implements = [], superTypes = [], storyIds = []):
        self.fragmentId = None
        self.parameters = parameters
        self.output     = output
        self.loc        = loc
        self.calls      = calls
        self.plugins    = plugins
        self.implements = implements
        self.superTypes = superTypes
        self.storyIds   = storyIds
        
    # Returns a pandas DataFrame that describes a list of CodeFragment instances. Each instance is a dedicated
    # column in the dataframe.
    def describe(codeRepo, fragmentIds):
        desc_dict = {}
        desc_dict['Property'] = ['Parameters', 'Output', 'Loc', 'Calls', 'Plugins', 'Implements', 
                                 'Super Types', 'Story Ids']
        for frgId in fragmentIds:
            frg = codeRepo.findFragment(frgId)
            if (frg == None):
                continue
            description = [frg.parameters, frg.output, frg.loc, frg.calls, frg.plugins, frg.implements, 
                           frg.superTypes, frg.storyIds]
            desc_dict[frg.fragmentId] = description
            
        return pd.DataFrame(desc_dict)
        

In [8]:
class CodeRepo:
    def __init__(self):
        self.interfaces = []
        self.fragments  = []
        self.idCounter = IdCounter()
        
    def findInterface(self, interfaceId):
        for interface in self.interfaces:
            if interface.interfaceId == interfaceId:
                return interface
        return None         
    
    def addInterface(self, interface):
        interface.interfaceId = "Interface-" + str(self.idCounter.next_id())
        self.idCounter.increment()
        self.interfaces.append(interface)
        return interface.interfaceId
                
    def findFragment(self, fragmentId):
        for fragment in self.fragments:
            if fragment.fragmentId == fragmentId:
                return fragment
        return None
    
    def addFragment(self, fragment):
        fragment.fragmentId = "Fragment-" + str(self.idCounter.next_id())
        self.idCounter.increment()
        self.fragments.append(fragment)
        return fragment.fragmentId

In [9]:
class CodeEvolutionEngine:
    def __init__(self, refactoringModel):
        self.refactoringModel = refactoringModel
        self.codeRepo = CodeRepo()
    
    # Modifies a set of CodeFragments by moving some common code out of each fragment and putting it into a new
    # CodeFragment, which is then returned. The original CodeFragments are mutated to call the new CodeFragment and
    # to reduce the number of lines of code each original CodeFragment has.
    def factorOutSubroutine(self, fragmentIds):
        loc_min         = None
        all_params      = []
        all_calls       = []
        all_storyIds    = set()
        fragments       = [] #Used later, so build it out while we are looping 
        for frgId in fragmentIds:
            frg = self.codeRepo.findFragment(frgId)
            assert (frg != None) # Something is corrupted if repo has no fragment for id that only repo can generate
            fragments.append(frg)
            
            if loc_min == None: # Only happens in first cycle of the loop
                loc_min = frg.loc
            loc_min = min(loc_min, frg.loc)
            all_params.append(set(frg.parameters))
            all_calls.append(set(frg.calls))
            all_storyIds = all_storyIds.union(set(frg.storyIds))
        common_params    = list(set.intersection(*all_params))
        common_calls     = list(set.intersection(*all_calls))
        
        if loc_min < 2: # there is nothing to refactor in this case
            return None
        
        # Randomly select metadata for new fragment
        new_loc           = self.refactoringModel.random.pickHowLong(loc_min)
        new_params_idxs   = self.refactoringModel.random.pickSubsetIndices(common_params)
        new_calls_idxs    = self.refactoringModel.random.pickSubsetIndices(common_calls)
        
        # Create new fragment
        new_params = []
        for idx in new_params_idxs:
            new_params.append(common_params[idx])
        new_calls = []
        for idx in new_calls_idxs:
            new_calls.append(common_calls[idx])
            
        # @TODO - set new_output to something more interesting than 'None'
        new_output = None

        new_frg = CodeFragment(new_params, new_output, new_loc, new_calls, storyIds = list(all_storyIds))
        self.codeRepo.addFragment(new_frg)
        
        # Remove refactored code from original fragments. This means fewer lines of code and fewer external calls
        # *but* not fewer parameters (original fragments still need to be passed those parameters since
        # they either still use them or need to pass them to the new fragment)
        #
        # Also add a call from the original fragments to the new fragment
        for frg in fragments:
            frg.loc -= new_loc
            for idx in new_calls_idxs:
                call = common_calls[idx]
                if call in frg.calls:
                    frg.calls.remove(call)
            frg.calls.append(new_frg.fragmentId)
            
        return new_frg
    
    # Modifies a set of CodeFragments by moving some common code out of each fragment and putting it into a new
    # CodeFragment, which is then returned. The new CodeFragment becomes the 'master control', so each of the original
    # CodeFragments becomes a mere 'plugin', i.e.: original CodeFragments are made smaller by removing all lines of
    # code that were moved to the common control, and each diminished CodeFragment is made to implement a new interface 
    # that the new control CodeFragment calls. Thus, the original CodeFragments become concrete classes of a new
    # hierarchy which is invoked polymorphically by the newly created CodeFragment.
    def factorOutPolymorphically(self, fragmentIds):
        # TODO
        return None
        

In [29]:
class DomainAtlas:
    
    # Atlas will be a tree, where nodes are identified by the partial branch that leads to them from the root.
    # The level of the tree is nb_levels, and each non-leaf node can branch in up to nb_cases-many ways.
    # Thus, each node can be identified as a list [c1, c2, c3, .., cn] where ci is between 0 and nb_cases-1, and 
    # n <= nb_levels. Thus, for a node N = [c1, ..., cn] there are potentially nb_cases children, each of them of
    # the form CHILD = [c1, ..., cn, x] where x is a number between 0 and nb_cases-1. However, this is only the
    # potential maximum because based on a random algorith not all possible values of x will be allowed. This is
    # to simulate the fact that different sub-domains may have overlapping but different sub-categories.
    # Notice that the data structure can be quite large - for 3 levels and 10 cases, there potentially might be 
    # 10*10*10 = 1000 leaves in the tree, where it not for the probabilistic pruning.
    def __init__(self, model, nb_levels, nb_cases):
        self.model     = model
        self.nb_levels = nb_levels
        self.nb_cases  = nb_cases
        self.atlas     = None
        return
    
    def buildAtlas(self):
        self.atlas = Node('Root')
        self._buildChildren(self.atlas, self.nb_levels)

     
    def _buildChildren(self, parent, levels_remaining):
        if levels_remaining == 0:
            return
        cases = list(range(self.nb_cases))
        valid_cases = self.model.random.pickSubset(cases)
        prefix = ''
        if (levels_remaining < self.nb_levels):
            prefix = parent.name + '-'
        for case in valid_cases:
            child = Node(prefix + str(case), parent=parent)
            self._buildChildren(child, levels_remaining -1)

In [30]:
model = RefactoringModel()
da = DomainAtlas(model, 4, 5)
da.buildAtlas()
atlas = da.atlas

In [31]:
for pre, fill, node in RenderTree(atlas):
    print("%s%s" % (pre, node.name))

Root
└── 2
    ├── 2-3
    │   ├── 2-3-3
    │   │   └── 2-3-3-2
    │   └── 2-3-0
    │       ├── 2-3-0-4
    │       ├── 2-3-0-3
    │       ├── 2-3-0-0
    │       └── 2-3-0-2
    ├── 2-1
    │   ├── 2-1-2
    │   │   ├── 2-1-2-0
    │   │   ├── 2-1-2-4
    │   │   └── 2-1-2-1
    │   ├── 2-1-3
    │   │   ├── 2-1-3-3
    │   │   ├── 2-1-3-2
    │   │   ├── 2-1-3-1
    │   │   └── 2-1-3-4
    │   └── 2-1-1
    │       └── 2-1-1-3
    ├── 2-4
    │   ├── 2-4-2
    │   ├── 2-4-3
    │   │   └── 2-4-3-0
    │   └── 2-4-4
    │       ├── 2-4-4-0
    │       ├── 2-4-4-1
    │       └── 2-4-4-2
    └── 2-2
        └── 2-2-2


In [32]:
#DotExporter(atlas).to_picture("C:\\Users\\aleja\\Documents\\Alex\\tmp\\atlas.png")
#DotExporter(atlas).to_picture("/C/Users/aleja/Documents/Alex/tmp/atlas.png")
DotExporter(atlas).to_dotfile("./tree.dot")

In [122]:
print(da)

<__main__.DomainAtlas object at 0x000001E2CE4A7160>


In [None]:
a = [3,4]
b = a.copy()
b.append(5)
a, b

In [None]:
model = RefactoringModel()
engine = CodeEvolutionEngine(model)
frg1 = CodeFragment(['p-a', 'p-b', 'p-c', 'p-f'], None, 1300, [], storyIds = ['Story #5'])
frg2 = CodeFragment(['p-a', 'p-d', 'p-e', 'p-f', 'p-g'], None, 1000, [], storyIds = ['Story #74'])

In [None]:
engine.codeRepo.addFragment(frg1)
engine.codeRepo.addFragment(frg2)

In [None]:
frg3 = engine.factorOutSubroutine([frg1.fragmentId, frg2.fragmentId])

In [None]:
CodeFragment.describe(engine.codeRepo, [frg1.fragmentId, frg2.fragmentId, frg3.fragmentId])