# Node Risk Vulnerability Index Analysis Experiment

In [13]:
from matplotlib import pyplot as plt

plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False

shapes = ['o', 'v', '^', '<', '>', 's', 'p', '*', 'h', '+']
colors = ['red', 'blue', 'green', 'orange', 'purple', 'cyan', 'magenta', 'darkgreen', 'chocolate']

## Experimental preparation

### Read network

In [14]:
from experiment.functions import func_get_lab_graph
import os
from experiment.lab2._0_config_ import SoftwareConfigs1, LabSpace

Graphs = dict()
Labels = dict()
Types = dict()

directory = os.path.join(LabSpace, '.vri')
if not os.path.isdir(directory):
    os.makedirs(directory)

for softwareName, (status, language, src_dir) in SoftwareConfigs1.items():
    if status:
        modelDir = os.path.join(LabSpace, softwareName)
        labDir = os.path.join(modelDir, '.lab2')
        graphDir = os.path.join(labDir, 'graph')

        if not os.path.isdir(labDir):
            os.makedirs(labDir, exist_ok=True)

        G, labelDict, _, typeDict = func_get_lab_graph(modelDir, labDir, softwareName)
        Graphs[softwareName] = G
        Labels[softwareName] = labelDict
        Types[softwareName] = typeDict
        print(f'{softwareName} Network reading completed')

### Calculate the risk vulnerability VRI, failure occurrence capability, output, node weights, and comparative complexity indicators for all function nodes

In [15]:
from experiment.functions import func_SortDictByValue

AFOs = dict()
AFPs = dict()
ARs = dict()
ARIs = dict()
VRIs = dict()
DRIs = dict()
LOCs = dict()
McCabes = dict()
HalsteatHs = dict()
HalsteatVs = dict()
SSCCs = dict()

Degrees = dict()
InDegrees = dict()
OutDegrees = dict()
NodeWeights = dict()

for softwareName, (status, language, src_dir) in SoftwareConfigs1.items():
    if status:
        G = Graphs[softwareName]
        types = Types[softwareName]
        
        AFO = dict()
        AFP = dict()
        VRI = dict()
        DRI = dict()
        LOC = dict()
        McCabe = dict()
        SSCC = dict()
        HalsteatH = dict()
        HalsteatV = dict()
        Degree = dict()
        InDegree = dict()
        OutDegree = dict()
        NodeWeight = dict()
        
        hasVariable = False
        for n, data in G.nodes(data=True):
            if hasVariable or types[n] == 'Method':
                AFO[n] = data['AFO']
                AFP[n] = data['AFP']
                VRI[n] = data['VRI']
                DRI[n] = data['DRI']
                LOC[n] = data['LOC']
                McCabe[n] = data['McCabe']
                SSCC[n] = data['sscc']
                HalsteatH[n] = data['H']
                HalsteatV[n] = data['V']
                Degree[n] = G.degree(n)
                InDegree[n] = G.in_degree(n)
                OutDegree[n] = G.out_degree(n)
                NodeWeight[n] = data['weight']
            
        AFOs[softwareName] = func_SortDictByValue(AFO)
        AFPs[softwareName] = func_SortDictByValue(AFP)
        VRIs[softwareName] = func_SortDictByValue(VRI)
        DRIs[softwareName] = func_SortDictByValue(DRI)
        LOCs[softwareName] = func_SortDictByValue(LOC)
        McCabes[softwareName] = func_SortDictByValue(McCabe)
        SSCCs[softwareName] = func_SortDictByValue(SSCC)
        HalsteatHs[softwareName] = func_SortDictByValue(HalsteatH)
        HalsteatVs[softwareName] = func_SortDictByValue(HalsteatV)
        Degrees[softwareName] = func_SortDictByValue(Degree)
        InDegrees[softwareName] = func_SortDictByValue(InDegree)
        OutDegrees[softwareName] = func_SortDictByValue(OutDegree)
        NodeWeights[softwareName] = func_SortDictByValue(NodeWeight)
        
print('Extraction of Node Risk Vulnerability Index and Node Complexity Index')

## Experiment 2: Effectiveness Analysis of Extracting Risk Fragile Nodes

### Set random attack experiment parameters

In [16]:
Rounds = 100

ErrNodeRate = 0.01


### Conduct random attack experiments

In [17]:
Softwares = [
    'Nginx-1.22.1',
    'Redis-7.2',
    'Node.js-20.5.0',
    'VLC-3.0.17'
]


In [18]:
from ccft.util.utils import serialize
import pandas as pd
from experiment.functions import func_get_edge_influence
from experiment.algorithms.propagation_models.sir_epidemic import sir_epidemic
import random
from experiment.algorithms.propagation_models.independent_cascade import independent_cascade
from experiment.algorithms.propagation_models.linear_threshold import linear_threshold

df50 = pd.DataFrame(columns=['name', 'vri', 'loc', 'mc', 'sscc', 'h', 'v'])
df45 = pd.DataFrame(columns=['name', 'vri', 'loc', 'mc', 'sscc', 'h', 'v'])
df40 = pd.DataFrame(columns=['name', 'vri', 'loc', 'mc', 'sscc', 'h', 'v'])
df35 = pd.DataFrame(columns=['name', 'vri', 'loc', 'mc', 'sscc', 'h', 'v'])
df30 = pd.DataFrame(columns=['name', 'vri', 'loc', 'mc', 'sscc', 'h', 'v'])
df25 = pd.DataFrame(columns=['name', 'vri', 'loc', 'mc', 'sscc', 'h', 'v'])
df20 = pd.DataFrame(columns=['name', 'vri', 'loc', 'mc', 'sscc', 'h', 'v'])
df15 = pd.DataFrame(columns=['name', 'vri', 'loc', 'mc', 'sscc', 'h', 'v'])
df10 = pd.DataFrame(columns=['name', 'vri', 'loc', 'mc', 'sscc', 'h', 'v'])
df5 = pd.DataFrame(columns=['name', 'vri', 'loc', 'mc', 'sscc', 'h', 'v'])
  
results = dict()

idx = 0
for softwareName, (status, language, src_dir) in SoftwareConfigs1.items():
    if status:
        G = Graphs[softwareName]
        types = Types[softwareName]
        results[softwareName] = dict()
        
        N = G.number_of_nodes()
        M = int(N * 0.05)
        
        count50 = int(N*0.5)
        count45 = int(N*0.45)
        count40 = int(N * 0.4)
        count35 = int(N * 0.35)
        count30 = int(N * 0.3)
        count25 = int(N * 0.25)
        count20 = int(N * 0.2)
        count15 = int(N * 0.15)
        count10 = int(N * 0.1)
        count5 = int(N * 0.05)
    
        nodes = list(G.nodes)
        
        vris50 = set(list(VRIs[softwareName].keys())[:count50])
        afos50 = set(list(AFOs[softwareName].keys())[:count50])
        locs50 = set(list(LOCs[softwareName].keys())[:count50])
        mcs50 = set(list(McCabes[softwareName].keys())[:count50])
        ssccs50 = set(list(SSCCs[softwareName].keys())[:count50])
        hs50 = set(list(HalsteatHs[softwareName].keys())[:count50])
        vs50 = set(list(HalsteatVs[softwareName].keys())[:count50])
        
        vris45 = set(list(VRIs[softwareName].keys())[:count45])
        afos45 = set(list(AFOs[softwareName].keys())[:count45])
        locs45 = set(list(LOCs[softwareName].keys())[:count45])
        mcs45 = set(list(McCabes[softwareName].keys())[:count45])
        ssccs45 = set(list(SSCCs[softwareName].keys())[:count45])
        hs45 = set(list(HalsteatHs[softwareName].keys())[:count45])
        vs45 = set(list(HalsteatVs[softwareName].keys())[:count45])
        
        vris40 = set(list(VRIs[softwareName].keys())[:count40])
        afos40 = set(list(AFOs[softwareName].keys())[:count40])
        locs40 = set(list(LOCs[softwareName].keys())[:count40])
        mcs40 = set(list(McCabes[softwareName].keys())[:count40])
        ssccs40 = set(list(SSCCs[softwareName].keys())[:count40])
        hs40 = set(list(HalsteatHs[softwareName].keys())[:count40])
        vs40 = set(list(HalsteatVs[softwareName].keys())[:count40])
        
        vris35 = set(list(VRIs[softwareName].keys())[:count35])
        afos35 = set(list(AFOs[softwareName].keys())[:count35])
        locs35 = set(list(LOCs[softwareName].keys())[:count35])
        mcs35 = set(list(McCabes[softwareName].keys())[:count35])
        ssccs35 = set(list(SSCCs[softwareName].keys())[:count35])
        hs35 = set(list(HalsteatHs[softwareName].keys())[:count35])
        vs35 = set(list(HalsteatVs[softwareName].keys())[:count35])
        
        vris30 = set(list(VRIs[softwareName].keys())[:count30])
        afos30 = set(list(AFOs[softwareName].keys())[:count30])
        locs30 = set(list(LOCs[softwareName].keys())[:count30])
        mcs30 = set(list(McCabes[softwareName].keys())[:count30])
        ssccs30 = set(list(SSCCs[softwareName].keys())[:count30])
        hs30 = set(list(HalsteatHs[softwareName].keys())[:count30])
        vs30 = set(list(HalsteatVs[softwareName].keys())[:count30])
        
        vris25 = set(list(VRIs[softwareName].keys())[:count25])
        afos25 = set(list(AFOs[softwareName].keys())[:count25])
        locs25 = set(list(LOCs[softwareName].keys())[:count25])
        mcs25 = set(list(McCabes[softwareName].keys())[:count25])
        ssccs25 = set(list(SSCCs[softwareName].keys())[:count25])
        hs25 = set(list(HalsteatHs[softwareName].keys())[:count25])
        vs25 = set(list(HalsteatVs[softwareName].keys())[:count25])
        
        vris20 = set(list(VRIs[softwareName].keys())[:count20])
        afos20 = set(list(AFOs[softwareName].keys())[:count20])
        locs20 = set(list(LOCs[softwareName].keys())[:count20])
        mcs20 = set(list(McCabes[softwareName].keys())[:count20])
        ssccs20 = set(list(SSCCs[softwareName].keys())[:count20])
        hs20 = set(list(HalsteatHs[softwareName].keys())[:count20])
        vs20 = set(list(HalsteatVs[softwareName].keys())[:count20])
        
        vris15 = set(list(VRIs[softwareName].keys())[:count15])
        afos15 = set(list(AFOs[softwareName].keys())[:count15])
        locs15 = set(list(LOCs[softwareName].keys())[:count15])
        mcs15 = set(list(McCabes[softwareName].keys())[:count15])
        ssccs15 = set(list(SSCCs[softwareName].keys())[:count15])
        hs15 = set(list(HalsteatHs[softwareName].keys())[:count15])
        vs15 = set(list(HalsteatVs[softwareName].keys())[:count15])
        
        vris10 = set(list(VRIs[softwareName].keys())[:count10])
        afos10 = set(list(AFOs[softwareName].keys())[:count10])
        locs10 = set(list(LOCs[softwareName].keys())[:count10])
        mcs10 = set(list(McCabes[softwareName].keys())[:count10])
        ssccs10 = set(list(SSCCs[softwareName].keys())[:count10])
        hs10 = set(list(HalsteatHs[softwareName].keys())[:count10])
        vs10 = set(list(HalsteatVs[softwareName].keys())[:count10])
        
        vris5 = set(list(VRIs[softwareName].keys())[:count5])
        afos5 = set(list(AFOs[softwareName].keys())[:count5])
        locs5 = set(list(LOCs[softwareName].keys())[:count5])
        mcs5 = set(list(McCabes[softwareName].keys())[:count5])
        ssccs5 = set(list(SSCCs[softwareName].keys())[:count5])
        hs5 = set(list(HalsteatHs[softwareName].keys())[:count5])
        vs5 = set(list(HalsteatVs[softwareName].keys())[:count5])
        
        wiInfluences = func_get_edge_influence(G, 'w-in-degree')
        iInfluences = func_get_edge_influence(G, 'in-degree')
        thresholds = dict.fromkeys(G.nodes, 0.5)
        
        counts = dict()
        df50.loc[idx] = [softwareName, 0, 0, 0, 0, 0, 0]
        df45.loc[idx] = [softwareName, 0, 0, 0, 0, 0, 0]
        df40.loc[idx] = [softwareName, 0, 0, 0, 0, 0, 0]
        df35.loc[idx] = [softwareName, 0, 0, 0, 0, 0, 0]
        df30.loc[idx] = [softwareName, 0, 0, 0, 0, 0, 0]
        df25.loc[idx] = [softwareName, 0, 0, 0, 0, 0, 0]
        df20.loc[idx] = [softwareName, 0, 0, 0, 0, 0, 0]
        df15.loc[idx] = [softwareName, 0, 0, 0, 0, 0, 0]
        df10.loc[idx] = [softwareName, 0, 0, 0, 0, 0, 0]
        df5.loc[idx] = [softwareName, 0, 0, 0, 0, 0, 0]
        
        print(f'{softwareName} randomly failed {M} nodes')
        print(f'Execute {Rounds} random attacks')
        for r in range(Rounds):
            errNodes = random.sample(nodes, M)
            
            # ltNodes, ltRoundCounts, ltRoundNodes = linear_threshold(G, seeds=errNodes, influences=iInfluences, thresholds=thresholds)
            # icNodes, icRoundCounts, icRoundNodes = independent_cascade(G, seeds=errNodes, influences=wiInfluences)
            siNodes, siRoundCounts, siRoundNodes = sir_epidemic(G, seeds=errNodes, beta=0.1, gamma=0.0)
            results[softwareName][r] = siNodes
            
            siNodes = set(siNodes)
            
            df50.loc[idx,'vri'] += len(vris50 & siNodes)
            df50.loc[idx,'loc'] += len(locs50 & siNodes)
            df50.loc[idx,'mc'] += len(mcs50 & siNodes)
            df50.loc[idx,'sscc'] += len(ssccs50 & siNodes)
            df50.loc[idx,'h'] += len(hs50 & siNodes)
            df50.loc[idx,'v'] += len(vs50 & siNodes)
            
            df45.loc[idx,'vri'] += len(vris45 & siNodes)
            df45.loc[idx,'loc'] += len(locs45 & siNodes)
            df45.loc[idx,'mc'] += len(mcs45 & siNodes)
            df45.loc[idx,'sscc'] += len(ssccs45 & siNodes)
            df45.loc[idx,'h'] += len(hs45 & siNodes)
            df45.loc[idx,'v'] += len(vs45 & siNodes)
            
            df40.loc[idx,'vri'] += len(vris40 & siNodes)
            df40.loc[idx,'loc'] += len(locs40 & siNodes)
            df40.loc[idx,'mc'] += len(mcs40 & siNodes)
            df40.loc[idx,'sscc'] += len(ssccs40 & siNodes)
            df40.loc[idx,'h'] += len(hs40 & siNodes)
            df40.loc[idx,'v'] += len(vs40 & siNodes)
            
            df35.loc[idx,'vri'] += len(vris35 & siNodes)
            df35.loc[idx,'loc'] += len(locs35 & siNodes)
            df35.loc[idx,'mc'] += len(mcs35 & siNodes)
            df35.loc[idx,'sscc'] += len(ssccs35 & siNodes)
            df35.loc[idx,'h'] += len(hs35 & siNodes)
            df35.loc[idx,'v'] += len(vs35 & siNodes)
            
            df30.loc[idx,'vri'] += len(vris30 & siNodes)
            df30.loc[idx,'loc'] += len(locs30 & siNodes)
            df30.loc[idx,'mc'] += len(mcs30 & siNodes)
            df30.loc[idx,'sscc'] += len(ssccs30 & siNodes)
            df30.loc[idx,'h'] += len(hs30 & siNodes)
            df30.loc[idx,'v'] += len(vs30 & siNodes)
            
            df25.loc[idx,'vri'] += len(vris25 & siNodes)
            df25.loc[idx,'loc'] += len(locs25 & siNodes)
            df25.loc[idx,'mc'] += len(mcs25 & siNodes)
            df25.loc[idx,'sscc'] += len(ssccs25 & siNodes)
            df25.loc[idx,'h'] += len(hs25 & siNodes)
            df25.loc[idx,'v'] += len(vs25 & siNodes)
            
            df20.loc[idx,'vri'] += len(vris20 & siNodes)
            df20.loc[idx,'loc'] += len(locs20 & siNodes)
            df20.loc[idx,'mc'] += len(mcs20 & siNodes)
            df20.loc[idx,'sscc'] += len(ssccs20 & siNodes)
            df20.loc[idx,'h'] += len(hs20 & siNodes)
            df20.loc[idx,'v'] += len(vs20 & siNodes)
            
            df15.loc[idx,'vri'] += len(vris15 & siNodes)
            df15.loc[idx,'loc'] += len(locs15 & siNodes)
            df15.loc[idx,'mc'] += len(mcs15 & siNodes)
            df15.loc[idx,'sscc'] += len(ssccs15 & siNodes)
            df15.loc[idx,'h'] += len(hs15 & siNodes)
            df15.loc[idx,'v'] += len(vs15 & siNodes)
            
            df10.loc[idx,'vri'] += len(vris10 & siNodes)
            df10.loc[idx,'loc'] += len(locs10 & siNodes)
            df10.loc[idx,'mc'] += len(mcs10 & siNodes)
            df10.loc[idx,'sscc'] += len(ssccs10 & siNodes)
            df10.loc[idx,'h'] += len(hs10 & siNodes)
            df10.loc[idx,'v'] += len(vs10 & siNodes)
            
            df5.loc[idx,'vri'] += len(vris5 & siNodes)
            df5.loc[idx,'loc'] += len(locs5 & siNodes)
            df5.loc[idx,'mc'] += len(mcs5 & siNodes)
            df5.loc[idx,'sscc'] += len(ssccs5 & siNodes)
            df5.loc[idx,'h'] += len(hs5 & siNodes)
            df5.loc[idx,'v'] += len(vs5 & siNodes)
            pass
        
        idx += 1
        
df50[['vri', 'loc', 'mc', 'sscc', 'h', 'v']] /=  Rounds
df45[['vri', 'loc', 'mc', 'sscc', 'h', 'v']] /=  Rounds
df40[['vri', 'loc', 'mc', 'sscc', 'h', 'v']] /=  Rounds
df35[['vri', 'loc', 'mc', 'sscc', 'h', 'v']] /=  Rounds
df30[['vri', 'loc', 'mc', 'sscc', 'h', 'v']] /=  Rounds
df25[['vri', 'loc', 'mc', 'sscc', 'h', 'v']] /=  Rounds
df20[['vri', 'loc', 'mc', 'sscc', 'h', 'v']] /=  Rounds
df15[['vri', 'loc', 'mc', 'sscc', 'h', 'v']] /=  Rounds
df10[['vri', 'loc', 'mc', 'sscc', 'h', 'v']] /=  Rounds
df5[['vri', 'loc', 'mc', 'sscc', 'h', 'v']] /=  Rounds
df50.set_index('name',inplace=True)        
df45.set_index('name',inplace=True)        
df40.set_index('name',inplace=True)        
df35.set_index('name',inplace=True)        
df30.set_index('name',inplace=True)        
df25.set_index('name',inplace=True)        
df20.set_index('name',inplace=True)        
df15.set_index('name',inplace=True)        
df10.set_index('name',inplace=True)        
df5.set_index('name',inplace=True)     



In [19]:
import shutil

saveDir = os.path.join(LabSpace, 'graph', 'VRI')
if os.path.isdir(saveDir):
    shutil.rmtree(saveDir)
os.makedirs(saveDir)    

In [20]:
X = ['0', '5%', '10%', '15%', '20%', '25%', '30%', '35%', '40%','45%', '50%']
labs = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']
vriY = []

idx = 0
fig, axs = plt.subplots(3, 3, figsize=(18, 18))
plt.subplots_adjust(hspace=0.25, wspace=0.23)

for softwareName, (status, language, src_dir) in SoftwareConfigs1.items():
    if status:
        vriY = [0, df5.loc[softwareName, 'vri'], df10.loc[softwareName, 'vri'], df15.loc[softwareName, 'vri'], 
                df20.loc[softwareName, 'vri'], df25.loc[softwareName, 'vri'], df30.loc[softwareName, 'vri'], 
                df35.loc[softwareName, 'vri'], df40.loc[softwareName, 'vri'], df45.loc[softwareName, 'vri'], 
                df50.loc[softwareName, 'vri']]
        locY = [0, df5.loc[softwareName, 'loc'], df10.loc[softwareName, 'loc'], df15.loc[softwareName, 'loc'], 
                df20.loc[softwareName, 'loc'], df25.loc[softwareName, 'loc'], df30.loc[softwareName, 'loc'], 
                df35.loc[softwareName, 'loc'], df40.loc[softwareName, 'loc'], df45.loc[softwareName, 'loc'],
                df50.loc[softwareName, 'loc']]
        mcY = [0, df5.loc[softwareName, 'mc'], df10.loc[softwareName, 'mc'], df15.loc[softwareName, 'mc'], 
               df20.loc[softwareName, 'mc'], df25.loc[softwareName, 'mc'], df30.loc[softwareName, 'mc'], 
               df35.loc[softwareName, 'mc'], df40.loc[softwareName, 'mc'], df45.loc[softwareName, 'mc'],
               df50.loc[softwareName, 'mc']]
        ssccY = [0, df5.loc[softwareName, 'sscc'], df10.loc[softwareName, 'sscc'], df15.loc[softwareName, 'sscc'], 
                 df20.loc[softwareName, 'sscc'], df25.loc[softwareName, 'sscc'], df30.loc[softwareName, 'sscc'], 
                 df35.loc[softwareName, 'sscc'], df40.loc[softwareName, 'sscc'], df45.loc[softwareName, 'sscc'],
                 df50.loc[softwareName, 'sscc']]
        hY = [0, df5.loc[softwareName, 'h'], df10.loc[softwareName, 'h'], df15.loc[softwareName, 'h'], 
              df20.loc[softwareName, 'h'], df25.loc[softwareName, 'h'], df30.loc[softwareName, 'h'], 
              df35.loc[softwareName, 'h'], df40.loc[softwareName, 'h'], df45.loc[softwareName, 'h'], 
              df50.loc[softwareName, 'h']]
        vY = [0, df5.loc[softwareName, 'v'], df10.loc[softwareName, 'v'], df15.loc[softwareName, 'v'], 
              df20.loc[softwareName, 'v'], df25.loc[softwareName, 'v'], df30.loc[softwareName, 'v'], 
              df35.loc[softwareName, 'v'], df40.loc[softwareName, 'v'], df45.loc[softwareName, 'v'],
              df50.loc[softwareName, 'v']]
        
        ax = axs[int(idx / 3), idx % 3]
        ax.plot(X, vriY, label='VRI')
        ax.plot(X, locY, label='LOC')
        ax.plot(X, mcY, label='McCabe')
        ax.plot(X, ssccY, label='SSCC')
        ax.plot(X, hY, label='H')
        ax.plot(X, vY, label='V')
        ax.legend(loc='best')
        
        ax.set_ylabel('Number of affected individuals', fontdict={'size': 12})
        ax.set_title(f'{labs[idx]}) {softwareName}', y=-0.15, fontdict={'family': 'Times New Roman', 'size': 18})
        
        df = pd.DataFrame()
        df['rate'] = X
        df['VRI'] = vriY
        df['LOC'] = locY
        df['McCabe'] = mcY
        df['SSCC'] = ssccY
        df['H'] = hY
        df['V'] = vY
        df.to_csv(os.path.join(saveDir, f'{softwareName}.csv'))
        
        idx += 1
    

In [21]:
serialize(os.path.join(LabSpace, '.vri', 'sim-results.bin'), results)
df50.to_csv(os.path.join(LabSpace, '.vri', 'si-50.csv'))
df40.to_csv(os.path.join(LabSpace, '.vri', 'si-40.csv'))
df30.to_csv(os.path.join(LabSpace, '.vri', 'si-30.csv'))
df15.to_csv(os.path.join(LabSpace, '.vri', 'si-15.csv'))
df10.to_csv(os.path.join(LabSpace, '.vri', 'si-10.csv'))
df5.to_csv(os.path.join(LabSpace, '.vri', 'si-5.csv'))
