Mine the patterns about coarse-gained and fine-grained configutation dependencies from succesfully executed serverless applications

In [11]:
import yaml
import os
from collections import Counter
from GeneralMethod import uniRepAll, transforRepPattern
import itertools

In [12]:

# implement FP-Growth
# part 1 : construct node classes
class treeNode:

    def __init__(self, nameValue, numOccur, parentNode):
        self.name = nameValue 
        self.count = numOccur 
        self.nodeLink = None 
        self.parent = parentNode 
        self.children = {} 
    
    def inc(self, numOccur):

        self.count += numOccur

    def disp(self, ind=1):
        for child in self.children.values():
            child.disp(ind + 1) 


# part 2 : Raw data creation and processing
from collections import OrderedDict

def createInitSet(dataSet):

    retDict=OrderedDict() # retDict = {}
    for trans in dataSet:
       
        retDict[frozenset(trans)] = 1
    return retDict

# part 3 : create FP tree
def createTree(dataSet,  minSup=1):

    headerTable = {}  
    for trans in dataSet:  
        for item in trans:     
            headerTable[item] = headerTable.get(item, 0) + dataSet[trans]
    for k in list(headerTable.keys()): 
        if headerTable[k] < minSup:
            del (headerTable[k]) 
   
    # start to construct FP tree
    freqItemSet = set(headerTable.keys())  

    if len(freqItemSet) == 0:
        return None, None

    for k in headerTable: 
        headerTable[k] = [headerTable[k], None] 
    retTree = treeNode('Null Set', 1, None) 
    
    for tranSet, count in dataSet.items(): 
       
        localD = {} 
        for item in tranSet:
            if item in freqItemSet:
                
                localD[item] = headerTable[item][0]
       
        if len(localD) > 0: 
            orderedItems = [v[0] for v in sorted(localD.items(), key=lambda p: p[1], reverse=True)]

            updateTree(orderedItems, retTree, headerTable, count)
    return retTree, headerTable

    
def updateTree(items, inTree, headerTable, count):

    if items[0] in inTree.children:
        
        inTree.children[items[0]].inc(count)
    else:
       
        inTree.children[items[0]] = treeNode(items[0], count, inTree)
       
        if headerTable[items[0]][1] is None: 
            headerTable[items[0]][1] = inTree.children[items[0]] 
        else:
            
            updateHeader(headerTable[items[0]][1], inTree.children[items[0]])
    if len(items) > 1:
        
        updateTree(items[1::], inTree.children[items[0]], headerTable, count)



def updateHeader(nodeToTest, targetNode):

    while nodeToTest.nodeLink is not None:
        nodeToTest = nodeToTest.nodeLink
    nodeToTest.nodeLink = targetNode



# part 4 ：mine frequent item sets
def ascendTree(leafNode, prefixPath):

    if leafNode.parent is not None:
        prefixPath.append(leafNode.name)
        ascendTree(leafNode.parent, prefixPath)


def findPrefixPath(basePat, treeNode):

    condPats = {}
    while treeNode is not None: 
        prefixPath = []
        ascendTree(treeNode, prefixPath)
        if len(prefixPath) > 1: 
           
            condPats[frozenset(prefixPath[1:])] = treeNode.count
        
        treeNode = treeNode.nodeLink
        
    return condPats


# part 5 : Recursive search for frequent item sets
def mineTree(inTree, headerTable, minSup, preFix, freqItemList):

    bigL = [v[0] for v in sorted(headerTable.items(), key=lambda p: str(p[1]))]

    for basePat in bigL:
        newFreqSet = preFix.copy() 
        newFreqSet.add(basePat)
        
        freqItemList.append(newFreqSet)
        

        condPathBases = findPrefixPath(basePat, headerTable[basePat][1])
        myCondTree, myHead = createTree(condPathBases, minSup)
        
        if myHead is not None:
           
            mineTree(myCondTree, myHead, minSup, newFreqSet, freqItemList)

In [13]:
# mine coarse-gained configuration dependencies
def mainAllRuleSAMCorse(directoryPath):

    simpData = loadSimpDatSAMCorse(directoryPath) 

    initSet = createInitSet(simpData)
    minSup = 0.05 * len(simpData)
    print(minSup)

    myFPtree, myHeaderTab = createTree(initSet, minSup)
    myFPtree.disp()
    freqItemList = []
    mineTree(myFPtree, myHeaderTab, minSup, set([]), freqItemList)
    newfreqItemList = []
    for item in freqItemList:
        if ("Transform" not in item) and ("AWSTemplateFormatVersion" not in item) and ("Description" not in item):
            flag = 0
            for field_i in item:
                rootname = field_i.split(".")[0]
                if rootname != "Outputs" and rootname != "Parameters" and "Description" not in field_i:
                    flag = flag + 0
                else:
                    flag = flag + 1
            if flag==0:
                newfreqItemList.append(item)

    # print(len(freqItemList))
    # print(len(newfreqItemList))


    
    left,right = handlefreqItemListSAMCorse(newfreqItemList, simpData)

    return left, right


def handlefreqItemListSAMCorse(freqItemList, simpData):
    saveleft = []
    saveright = []

    for tmp in freqItemList:
       
        combinations = generate_combinations(tmp)
        
        for i, (left, right) in enumerate(combinations, 1):
            
            left = list(left)
            right = list(right)
            
            tolen = len(left) + len(right)
            if tolen >1 and ('Transform' not in left) and ('Transform' not in right) and ('AWSTemplateFormatVersion' not in left) and ('AWSTemplateFormatVersion' not in right):
                flag = 0
                for data_i in simpData:
                    left_tmp = set(left)
                    data_i_tmp = set(data_i)
                    if left_tmp.issubset(data_i_tmp):
                        right_tmp = set(right)
                        if right_tmp.issubset(data_i_tmp):
                            flag = flag + 0  
                        else:
                            flag = flag + 1
                if flag==0:
                    saveleft.append(left)
                    saveright.append(right)
        
    saveleft_tmp = saveleft
    saveright_tmp = saveright

    filter_saveleft = []
    filter_saveright = []

    for index, value in enumerate(saveleft):
        tag = 0
        for index1, value1 in enumerate(saveleft_tmp):
           
            if index!=index1 and set(value) == set(value1):
                rightvalue = set(saveright[index])
                rightvalue1 = set(saveright_tmp[index1])
                if rightvalue.issubset(rightvalue1):
                    
                    tag = tag +1
                    # break
                else:
                    tag = tag + 0
        # print(tag)
        if tag == 0:
            newtag = 0
            for f in filter_saveleft:
                if set(saveleft[index])==set(f):
                   newtag = newtag + 1
                else:
                   newtag = newtag + 0
            if newtag == 0:               
                filter_saveleft.append(saveleft[index]) 
                filter_saveright.append(saveright[index])
            
    rootContentleft = "correlationleftCoarse"
    rootContentright = "correlationrightCoarse"
    f_left = open("Patterns/sam_{}.txt".format(rootContentleft), "w")
    f_right = open("Patterns/sam_{}.txt".format(rootContentright), "w")
    
    for i in range(len(filter_saveleft)):
        
        text_left = "+++".join(filter_saveleft[i])
        text_right = "+++".join(filter_saveright[i])
        
        f_left.write(text_left)
        f_left.write("\n")

        f_right.write(text_right)
        f_right.write("\n")
        
    f_left.close()
    f_right.close()

    print(len(filter_saveleft))

    return filter_saveleft, filter_saveright
 


# data processing 
def loadSimpDatSAMCorse(directoryPath):

    
    simpData = addEntryValueRepSAMCorse(directoryPath)
   
    return simpData

def addEntryValueRepSAMCorse(directoryPath):
    total_flat_config = uniRepAll(directoryPath)
    entryAll = []
    for file_i in total_flat_config:
        entryfile = []
        flat_config_key_new, flat_config_value_new = transforRepPattern(file_i)
        for index in range(len(flat_config_key_new)):
            tmp = "{}".format(flat_config_key_new[index])
            entryfile.append(tmp)
        entryfile = list(set(entryfile))
        entryAll.append(entryfile)
    return entryAll

def generate_combinations(elements):
    all_combinations = []
    for r in range(1, len(elements)):
        left_combinations = itertools.combinations(elements, r)
        for left in left_combinations:
            right = tuple(sorted(e for e in elements if e not in left))
            if right not in [x[1] for x in all_combinations]:
                all_combinations.append((left, right))
    return all_combinations


In [14]:
# mine fine-grained configuration dependencies
def mainAllRuleSAMFine(directoryPath):
    
    simpData = loadSimpDatSAM(directoryPath) 
    

    initSet = createInitSet(simpData)
    minSup = 0.02 * len(simpData)
    
    print(minSup)

    myFPtree, myHeaderTab = createTree(initSet, minSup)
    myFPtree.disp()
    freqItemList = []
    mineTree(myFPtree, myHeaderTab, minSup, set([]), freqItemList)
    
   

    newfreqItemList = []
    for item in freqItemList:
    
        flag = 0
        for field_i in item:
            tmp = field_i.split("=")
            rootname = tmp[0].split(".")[0]
            if rootname != "Outputs" and rootname != "Parameters" and tmp[0] != "Transform" and tmp[0] != "AWSTemplateFormatVersion" and tmp[0] != "Description" and "Description" not in tmp[0]:
                flag = flag + 0
            else:
                flag = flag + 1
        if flag==0:
            newfreqItemList.append(item)
    
    # print(len(freqItemList))
    # print(len(newfreqItemList))

    
    left,right = handlefreqItemListSAM(newfreqItemList, simpData)

    return left, right
    



def loadSimpDatSAM(directoryPath):
    simpData = addEntryValueRepSAM(directoryPath)
    return simpData


def addEntryValueRepSAM(directoryPath):
    total_flat_config = uniRepAll(directoryPath)
    entryAll = []
    for file_i in total_flat_config:
        entryfile = []
        flat_config_key_new, flat_config_value_new = transforRepPattern(file_i)
        for index in range(len(flat_config_key_new)):
            tmp = "{}={}".format(flat_config_key_new[index], flat_config_value_new[index])
            entryfile.append(tmp)
        entryfile = list(set(entryfile))
        entryAll.append(entryfile)
    return entryAll




def handlefreqItemListSAM(freqItemList, simpData):
    saveleft = []
    saveright = []

    for tmp in freqItemList:
       
        combinations = generate_combinations(tmp)
      
        for i, (left, right) in enumerate(combinations, 1):
            
            left = list(left)
            right = list(right)
            
            tolen = len(left) + len(right)
            if tolen >1 and ('Transform=AWS::Serverless-2016-10-31' not in left) and ('Transform=AWS::Serverless-2016-10-31' not in right) and ('AWSTemplateFormatVersion=2010-09-09' not in left) and ('AWSTemplateFormatVersion=2010-09-09' not in right):
                flag = 0
                for data_i in simpData:
                    left_tmp = set(left)
                    data_i_tmp = set(data_i)
                    if left_tmp.issubset(data_i_tmp):
                        right_tmp = set(right)
                        if right_tmp.issubset(data_i_tmp):
                            flag = flag + 0  
                        else:
                            flag = flag + 1
                if flag==0:
                    saveleft.append(left)
                    saveright.append(right)

    saveleft_tmp = saveleft
    saveright_tmp = saveright

    filter_saveleft = []
    filter_saveright = []

    for index, value in enumerate(saveleft):
        tag = 0
        for index1, value1 in enumerate(saveleft_tmp):
            
            if index!=index1 and set(value) == set(value1):
                rightvalue = set(saveright[index])
                rightvalue1 = set(saveright_tmp[index1])
                if rightvalue.issubset(rightvalue1):
                   
                    tag = tag +1
                    # break
                else:
                    tag = tag + 0
        # print(tag)
        if tag == 0:
            newtag = 0
            for f in filter_saveleft:
                if set(saveleft[index])==set(f):
                   newtag = newtag + 1
                else:
                   newtag = newtag + 0
            if newtag == 0:               
                filter_saveleft.append(saveleft[index]) 
                filter_saveright.append(saveright[index])


            
    
    rootContentleft = "correlationleftFine"
    rootContentright = "correlationrightFine"
    f_left = open("Patterns/sam_{}.txt".format(rootContentleft), "w")
    f_right = open("Patterns/sam_{}.txt".format(rootContentright), "w")
    
    for i in range(len(filter_saveleft)):
        
        text_left = "+++".join(filter_saveleft[i])
        text_right = "+++".join(filter_saveright[i])
        
        f_left.write(text_left)
        f_left.write("\n")

        f_right.write(text_right)
        f_right.write("\n")
        
    f_left.close()
    f_right.close()

    print(len(filter_saveleft))

    return filter_saveleft, filter_saveright
              



In [15]:
# mine configuration dependencies
directoryPath = "../Dataset/configuration files-real"
# mine coarse-gained configuration dependencies
mainAllRuleSAMCorse(directoryPath)
# mine fine-gained configuration dependencies
mainAllRuleSAMFine(directoryPath)

14.66
317


([['Resources.PHAWS::Serverless::Function.Properties.CodeUri=.'],
  ['Resources.PHAWS::Serverless::Function.Properties.Runtime=nodejs12.x',
   'Resources.PHAWS::Serverless::Function.Properties.CodeUri=.',
   'Resources.PHAWS::Serverless::Function.Properties.MemorySize=128'],
  ['Resources.PHAWS::Serverless::Function.Properties.Runtime=nodejs12.x',
   'Resources.PHAWS::Serverless::Function.Properties.Handler=index.handler',
   'Resources.PHAWS::Serverless::Function.Properties.CodeUri=.',
   'Resources.PHAWS::Serverless::Function.Properties.MemorySize=128'],
  ['Resources.PHAWS::Serverless::Function.Properties.Runtime=nodejs12.x',
   'Resources.PHAWS::Serverless::Function.Properties.CodeUri=.',
   'Resources.PHAWS::Serverless::Function.Properties.MemorySize=128',
   'Resources.PHAWS::Serverless::Function.Type=AWS::Serverless::Function'],
  ['Resources.PHAWS::Serverless::Function.Properties.Runtime=nodejs12.x',
   'Resources.PHAWS::Serverless::Function.Properties.Handler=index.handler',
 