In [11]:
# !pip install pgmpy

# Importing Dependencies

In [12]:
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination
import matplotlib.pyplot as plt
import networkx as nx

# Reading the Data

In [13]:
asset_vulnerability_df = pd.read_csv(r'.\dataset\asset_vulnerability.csv')
attack_vulnerability_df=pd.read_csv(r".\dataset\attack_vulnerability.csv")
threat_actor_df=pd.read_csv(r".\dataset\threat_actor_asset.csv")
prior_attack_df=pd.read_csv(r".\dataset\prior_attack_success.csv")


In [14]:
print(asset_vulnerability_df.shape)
print(attack_vulnerability_df.shape)
print(threat_actor_df.shape)
print(prior_attack_df.shape)

(25, 4)
(25, 3)
(25, 3)
(25, 3)


# Defining Bayesian Network Structure

In [16]:
assets = asset_vulnerability_df['Asset'].unique().tolist()

In [17]:
# Define the Bayesian Network structure
model = BayesianNetwork([('Threat_Actor', 'Attack_Vector'),
                         ('Attack_Vector', 'Vulnerability'),
                         ('Vulnerability', 'Asset')])


In [18]:
print(model)

BayesianNetwork with 4 nodes and 3 edges


# Defining Conditional Probability Distributions


# Step 4: Define Conditional Probability Distributions (CPDs)

# 4.1: CPD for Threat Actor based on prior attack success rates

In [44]:
threat_actor_counts = threat_actor_df['Threat_Actor'].value_counts()
total_count = threat_actor_counts.sum()
threat_actor_probs = threat_actor_counts / total_count
threat_actor_probs

Threat_Actor
External Hacker    0.52
Insider            0.48
Name: count, dtype: float64

In [47]:
cpd_threat_actor = TabularCPD(variable='Threat_Actor', 
                              variable_card=len(threat_actor_probs),  
                              values=[[threat_actor_probs.values[0]], 
                                      [threat_actor_probs.values[1]]],
                                state_names={'Threat_Actor':["External Hacker","Insider"]})

In [48]:
print(cpd_threat_actor)

+-------------------------------+------+
| Threat_Actor(External Hacker) | 0.52 |
+-------------------------------+------+
| Threat_Actor(Insider)         | 0.48 |
+-------------------------------+------+


## 4.2: CPD for Attack Vector given Threat Actor
## Calculate conditional probabilities using success rates

In [51]:
attack_vector_probs = prior_attack_df.pivot_table(index='Threat_Actor', columns='Attack_Vector', values='Success_Rate', aggfunc='mean').fillna(0)
attack_vector_probs_normalized = attack_vector_probs.div(attack_vector_probs.sum(axis=1), axis=0).values
attack_vector_probs.T

Threat_Actor,External Hacker,Insider
Attack_Vector,Unnamed: 1_level_1,Unnamed: 2_level_1
Phishing,0.625,0.425
RCE,0.7375,0.575
Remote Code Execution,0.65,0.5
SQL Injection,0.7375,0.44


In [52]:
cpd_attack_vector = TabularCPD(variable='Attack_Vector',
                               variable_card=attack_vector_probs_normalized.shape[1],
                               values=attack_vector_probs_normalized.T.tolist(), 
                               evidence=['Threat_Actor'],
                               evidence_card=[attack_vector_probs_normalized.shape[0]],
                               state_names={'Threat_Actor':["External Hacker","Insider"],
                                            'Attack_Vector': ['Phishing', 'RCE', 'Remote Code Execution', 'SQL Injection']})

In [55]:
print(cpd_attack_vector)

+-----+-----------------------+
| ... | Threat_Actor(Insider) |
+-----+-----------------------+
| ... | 0.2190721649484536    |
+-----+-----------------------+
| ... | 0.2963917525773196    |
+-----+-----------------------+
| ... | 0.2577319587628866    |
+-----+-----------------------+
| ... | 0.22680412371134023   |
+-----+-----------------------+


## 4.3: CPD for Vulnerability given Attack Vector
## Calculate conditional probabilities using success probabilities

In [24]:
attack_vulnerability_df['Vulnerability'].nunique()

13

In [25]:
vulnerability_pivot = attack_vulnerability_df.pivot_table(index='Attack_Vector', columns='Vulnerability', values='Success_Probability', aggfunc='mean').fillna(0)
vulnerability_probs_normalized = vulnerability_pivot.div(vulnerability_pivot.sum(axis=1), axis=0).values

In [26]:
vulnerability_pivot

Vulnerability,CVE-2023-09876,CVE-2023-12345,CVE-2023-23456,CVE-2023-34567,CVE-2023-45678,CVE-2023-54321,CVE-2023-54322,CVE-2023-67890,CVE-2023-67891,CVE-2023-76543,CVE-2023-87653,CVE-2023-87654,CVE-2023-98765
Attack_Vector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Phishing,0.55,0.0,0.0,0.55,0.0,0.6,0.0,0.0,0.0,0.5,0.5,0.0,0.0
RCE,0.0,0.0,0.0,0.6,0.0,0.7,0.75,0.0,0.0,0.65,0.0,0.6,0.65
Remote Code Execution,0.0,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.65,0.0,0.0,0.0,0.0
SQL Injection,0.8,0.775,0.7,0.0,0.85,0.0,0.85,0.85,0.75,0.7,0.0,0.8,0.75


In [27]:
vulnerability_probs_normalized.shape[1]

13

In [28]:
vulnerability_probs_normalized.T.tolist()

[[0.20370370370370372, 0.0, 0.0, 0.10223642172523963],
 [0.0, 0.0, 0.5517241379310345, 0.09904153354632587],
 [0.0, 0.0, 0.0, 0.08945686900958466],
 [0.20370370370370372, 0.1518987341772152, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.10862619808306709],
 [0.2222222222222222, 0.17721518987341772, 0.0, 0.0],
 [0.0, 0.189873417721519, 0.0, 0.10862619808306709],
 [0.0, 0.0, 0.0, 0.10862619808306709],
 [0.0, 0.0, 0.44827586206896547, 0.09584664536741214],
 [0.18518518518518517, 0.16455696202531647, 0.0, 0.08945686900958466],
 [0.18518518518518517, 0.0, 0.0, 0.0],
 [0.0, 0.1518987341772152, 0.0, 0.10223642172523963],
 [0.0, 0.16455696202531647, 0.0, 0.09584664536741214]]

In [81]:
cpd_vulnerability = TabularCPD(variable='Vulnerability', 
                               variable_card=vulnerability_probs_normalized.shape[1], 
                               values=vulnerability_probs_normalized.T.tolist(), 
                               evidence=['Attack_Vector'],
                               evidence_card=[vulnerability_probs_normalized.shape[0]],
                               state_names={'Vulnerability':['CVE-2023-09876','CVE-2023-12345','CVE-2023-23456','CVE-2023-34567','CVE-2023-45678','CVE-2023-54321','CVE-2023-54322','CVE-2023-67890','CVE-2023-67891','CVE-2023-76543','CVE-2023-87653','CVE-2023-87654','CVE-2023-98765'],
                                            'Attack_Vector': ['Phishing', 'RCE', 'Remote Code Execution', 'SQL Injection']})

In [82]:
print(cpd_vulnerability)

+-------------------------------+-----+------------------------------+
| Attack_Vector                 | ... | Attack_Vector(SQL Injection) |
+-------------------------------+-----+------------------------------+
| Vulnerability(CVE-2023-09876) | ... | 0.10223642172523963          |
+-------------------------------+-----+------------------------------+
| Vulnerability(CVE-2023-12345) | ... | 0.09904153354632587          |
+-------------------------------+-----+------------------------------+
| Vulnerability(CVE-2023-23456) | ... | 0.08945686900958466          |
+-------------------------------+-----+------------------------------+
| Vulnerability(CVE-2023-34567) | ... | 0.0                          |
+-------------------------------+-----+------------------------------+
| Vulnerability(CVE-2023-45678) | ... | 0.10862619808306709          |
+-------------------------------+-----+------------------------------+
| Vulnerability(CVE-2023-54321) | ... | 0.0                          |
+-----

## 4.4: CPD for Asset given Vulnerability
## Calculate conditional probabilities using exploit probabilities

In [31]:
asset_vulnerability_df['Asset'].nunique()

25

In [32]:
asset_pivot = asset_vulnerability_df.pivot_table(index='Vulnerability', columns='Asset', values='Exploit_Probability', aggfunc='mean').fillna(0)
asset_probs_normalized = asset_pivot.div(asset_pivot.sum(axis=1), axis=0).values

In [33]:
cpd_asset = TabularCPD(variable='Asset', 
                       variable_card=asset_probs_normalized.shape[1], 
                       values=asset_probs_normalized.T.tolist(), 
                       evidence=['Vulnerability'],
                       evidence_card=[asset_probs_normalized.shape[0]])

In [34]:
print(cpd_asset)

+---------------+------------------+-----+---------------------+
| Vulnerability | Vulnerability(0) | ... | Vulnerability(12)   |
+---------------+------------------+-----+---------------------+
| Asset(0)      | 0.0              | ... | 0.0                 |
+---------------+------------------+-----+---------------------+
| Asset(1)      | 0.0              | ... | 0.3412322274881517  |
+---------------+------------------+-----+---------------------+
| Asset(2)      | 0.0              | ... | 0.0                 |
+---------------+------------------+-----+---------------------+
| Asset(3)      | 0.0              | ... | 0.0                 |
+---------------+------------------+-----+---------------------+
| Asset(4)      | 0.0              | ... | 0.33649289099526064 |
+---------------+------------------+-----+---------------------+
| Asset(5)      | 0.0              | ... | 0.0                 |
+---------------+------------------+-----+---------------------+
| Asset(6)      | 0.0    

## Add CPDs to the model

In [35]:
model.add_cpds(cpd_threat_actor, cpd_attack_vector, cpd_vulnerability, cpd_asset)

In [36]:
# Verify model correctness
assert model.check_model()

In [37]:
# Perform inference
inference = VariableElimination(model)

In [83]:
threat_actors = [0, 1]
attack_vectors = [0,1,2,3]
vulnerabilities =[0,1,2,3,4,5,6,7,8,9,10,11,12]

In [85]:
# Function to check if a combination is valid
def is_valid_combination(inference, evidence):
    try:
        posterior = inference.query(variables=['Asset'], evidence=evidence)
        return True
    except Exception as e:
        
        return False

# Function to calculate posterior probabilities for all valid combinations
def calculate_posteriors_for_valid_combinations(inference):
    results = []
    
    # Iterate over all combinations of Threat Actor, Attack Vector, and Vulnerability
    for threat_actor in threat_actors:
        for attack_vector in attack_vectors:
            for vulnerability in vulnerabilities:
                # Define evidence based on current combination
                evidence = {'Threat_Actor': threat_actor,
                            'Attack_Vector': attack_vector,
                            'Vulnerability': vulnerability
                            }
                
                
                if is_valid_combination(inference, evidence):
                    try:
                        # Perform inference to get the posterior probability for 'Asset'
                        posterior = inference.query(variables=['Asset'], evidence=evidence)
                        result = {
                            'Threat_Actor': threat_actor,
                            'Attack_Vector': attack_vector,
                            'Vulnerability': vulnerability,
                            'Posterior_Probabilities': posterior
                        }
                        results.append(result)
                    except Exception as e:
                        print(f"Error in inference for {evidence}: {str(e)}")
                else:
                    print(f"Skipping invalid combination: {evidence}")
    
    return results


posterior_results = calculate_posteriors_for_valid_combinations(inference)


for result in posterior_results:
    print(f"Threat Actor: {result['Threat_Actor']}, Attack Vector: {result['Attack_Vector']}, Vulnerability: {result['Vulnerability']}")
    print("Posterior Probability of each asset being at risk:")
    print(result['Posterior_Probabilities'])
    print("\n")


Threat Actor: 0, Attack Vector: 0, Vulnerability: 0
Posterior Probability of each asset being at risk:
+-----------+--------------+
| Asset     |   phi(Asset) |
| Asset(0)  |       0.0000 |
+-----------+--------------+
| Asset(1)  |       0.0000 |
+-----------+--------------+
| Asset(2)  |       0.0000 |
+-----------+--------------+
| Asset(3)  |       0.0000 |
+-----------+--------------+
| Asset(4)  |       0.0000 |
+-----------+--------------+
| Asset(5)  |       0.0000 |
+-----------+--------------+
| Asset(6)  |       0.0000 |
+-----------+--------------+
| Asset(7)  |       0.0000 |
+-----------+--------------+
| Asset(8)  |       0.0000 |
+-----------+--------------+
| Asset(9)  |       0.0000 |
+-----------+--------------+
| Asset(10) |       0.0000 |
+-----------+--------------+
| Asset(11) |       0.0000 |
+-----------+--------------+
| Asset(12) |       0.0000 |
+-----------+--------------+
| Asset(13) |       0.0000 |
+-----------+--------------+
| Asset(14) |       0.0000 

In [86]:
# posterior_asset = inference.query(variables=['Asset'], evidence={'Threat_Actor': 1})

# print("Posterior Probability of each asset being at risk:")
# print(posterior_asset)

In [89]:

def rank_assets_based_on_risk(posterior_results):
    asset_risk_scores = {}

    # Aggregate posterior probabilities for each asset
    for result in posterior_results:
        # Extracting the posterior probabilities for the specific asset
        asset_probabilities = result['Posterior_Probabilities'].values.flatten()  
        
        
        for asset_index, asset_score in enumerate(asset_probabilities):
            asset_name = f"Asset_{asset_index}"  
            
           
            if asset_name not in asset_risk_scores:
                asset_risk_scores[asset_name] = 0
            asset_risk_scores[asset_name] += asset_score
    print(asset_risk_scores)
    
    ranked_assets = sorted(asset_risk_scores.items(), key=lambda x: x[1], reverse=True)

    return ranked_assets

# Run the ranking function
ranked_assets = rank_assets_based_on_risk(posterior_results)

# Display the ranked assets and their risk scores
print("Ranked Assets Based on Risk Scores:")
for asset, score in ranked_assets:
    print(f"Asset: {asset}, Risk Score: {score}")


print("\nInsights:")
for asset, score in ranked_assets:
    if score > 0.5:  
        print(f"Asset {asset} is at high risk with a score of {score}. Immediate action may be required.")
    elif score > 0.3:  
        print(f"Asset {asset} is at medium risk with a score of {score}. Consider monitoring closely.")
    else:
        print(f"Asset {asset} is at low risk with a score of {score}. Regular maintenance is sufficient.")


{'Asset_0': np.float64(4.3076923076923075), 'Asset_1': np.float64(2.7298578199052135), 'Asset_2': np.float64(3.6834532374100712), 'Asset_3': np.float64(3.7333333333333334), 'Asset_4': np.float64(2.6919431279620847), 'Asset_5': np.float64(3.3644859813084116), 'Asset_6': np.float64(8.0), 'Asset_7': np.float64(2.6526315789473682), 'Asset_8': np.float64(3.692307692307693), 'Asset_9': np.float64(2.4539877300613497), 'Asset_10': np.float64(4.285714285714286), 'Asset_11': np.float64(4.266666666666667), 'Asset_12': np.float64(8.0), 'Asset_13': np.float64(4.137931034482759), 'Asset_14': np.float64(2.8210526315789477), 'Asset_15': np.float64(4.316546762589929), 'Asset_16': np.float64(8.0), 'Asset_17': np.float64(2.5263157894736845), 'Asset_18': np.float64(8.0), 'Asset_19': np.float64(4.635514018691588), 'Asset_20': np.float64(3.714285714285715), 'Asset_21': np.float64(2.699386503067485), 'Asset_22': np.float64(2.846625766871166), 'Asset_23': np.float64(2.578199052132702), 'Asset_24': np.float64(