In [3]:
# !pip install pgmpy

# Importing Dependencies

In [30]:
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD

# Reading the Data

In [31]:
asset_vulnerability_df = pd.read_csv(r'C:\Users\swaroop.srisailam\Desktop\Swaroop\Bayesian Risk Assessment using Cybersecurity Data\dataset\asset_vulnerability.csv')
attack_vulnerability_df=pd.read_csv(r"C:\Users\swaroop.srisailam\Desktop\Swaroop\Bayesian Risk Assessment using Cybersecurity Data\dataset\attack_vulnerability.csv")
threat_actor_df=pd.read_csv(r"C:\Users\swaroop.srisailam\Desktop\Swaroop\Bayesian Risk Assessment using Cybersecurity Data\dataset\threat_actor_asset.csv")
prior_attack_df=pd.read_csv(r"C:\Users\swaroop.srisailam\Desktop\Swaroop\Bayesian Risk Assessment using Cybersecurity Data\dataset\prior_attack_success.csv")


In [32]:
print(asset_vulnerability_df.shape)
print(attack_vulnerability_df.shape)
print(threat_actor_df.shape)
print(prior_attack_df.shape)

(25, 4)
(25, 3)
(25, 3)
(25, 3)


# Defining Bayesian Network Structure

In [33]:
model = BayesianNetwork([('Threat_Actor', 'Attack_Vector'),
                         ('Attack_Vector', 'Vulnerability'),
                         ('Vulnerability', 'Asset')])

In [34]:
print(model)

BayesianNetwork with 4 nodes and 3 edges


# Defining Conditional Probability Distributions


In [35]:
threat_actor_counts = threat_actor_df['Threat Actor'].value_counts()

In [36]:
total_count = threat_actor_counts.sum()
threat_actor_probs = threat_actor_counts / total_count

In [37]:
threat_actor_probs

Threat Actor
External Hacker    0.52
Insider            0.48
Name: count, dtype: float64

In [39]:
cpd_threat_actor = TabularCPD(variable='Threat_Actor', 
                              variable_card=len(threat_actor_probs),  
                              values=[[threat_actor_probs.values[0]], 
                                      [threat_actor_probs.values[1]]])

In [41]:
print(cpd_threat_actor)

+-----------------+------+
| Threat_Actor(0) | 0.52 |
+-----------------+------+
| Threat_Actor(1) | 0.48 |
+-----------------+------+


In [42]:
prior_attack_df

Unnamed: 0,Threat Actor,Attack Vector,Success Rate
0,Insider,Phishing,0.35
1,External Hacker,SQL Injection,0.6
2,External Hacker,Remote Code Execution,0.65
3,Insider,Remote Code Execution,0.5
4,Insider,SQL Injection,0.4
5,External Hacker,Phishing,0.55
6,External Hacker,RCE,0.65
7,Insider,SQL Injection,0.35
8,External Hacker,RCE,0.7
9,Insider,SQL Injection,0.5


In [43]:
attack_vector_counts = prior_attack_df['Attack Vector'].value_counts()

In [44]:
attack_vector_counts

Attack Vector
SQL Injection            9
Phishing                 8
RCE                      6
Remote Code Execution    2
Name: count, dtype: int64

In [47]:
total_count = attack_vector_counts.sum()
attack_vector_probs = attack_vector_counts / total_count

In [52]:
len(attack_vector_probs)

4

In [116]:
attack_vector_probs

array([[0.5952381 , 0.56190476, 0.56521739, 0.62632696],
       [0.4047619 , 0.43809524, 0.43478261, 0.37367304]])

In [73]:
conditional_probs = prior_attack_df.groupby(['Threat Actor', 'Attack Vector'])['Success Rate'].mean().unstack()

# Print the resulting conditional probability table
print(conditional_probs)
conditional_probs_normalized = conditional_probs.div(conditional_probs.sum(axis=0), axis=1)

# Convert to a list for TabularCPD
attack_vector_probs = conditional_probs_normalized.values
attack_vector_probs.T

Attack Vector    Phishing     RCE  Remote Code Execution  SQL Injection
Threat Actor                                                           
External Hacker     0.625  0.7375                   0.65         0.7375
Insider             0.425  0.5750                   0.50         0.4400


array([[0.5952381 , 0.4047619 ],
       [0.56190476, 0.43809524],
       [0.56521739, 0.43478261],
       [0.62632696, 0.37367304]])

In [123]:
conditional_probs

Attack Vector,Phishing,RCE,Remote Code Execution,SQL Injection
Threat Actor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
External Hacker,0.625,0.7375,0.65,0.7375
Insider,0.425,0.575,0.5,0.44


In [87]:
conditional_probs = prior_attack_df.groupby(['Threat Actor', 'Attack Vector'])['Success Rate'].mean().unstack()


total_success = conditional_probs.sum(axis=1)


conditional_probs_normalized = conditional_probs.div(total_success, axis=0)

# Display the results
print("Conditional Probabilities:")
print(conditional_probs_normalized)

Conditional Probabilities:
Attack Vector    Phishing       RCE  Remote Code Execution  SQL Injection
Threat Actor                                                             
External Hacker  0.227273  0.268182               0.236364       0.268182
Insider          0.219072  0.296392               0.257732       0.226804


In [115]:
total_success

Threat Actor
External Hacker    2.75
Insider            1.94
dtype: float64

In [96]:
conditional_probs_normalized.T

Threat Actor,External Hacker,Insider
Attack Vector,Unnamed: 1_level_1,Unnamed: 2_level_1
Phishing,0.227273,0.219072
RCE,0.268182,0.296392
Remote Code Execution,0.236364,0.257732
SQL Injection,0.268182,0.226804


In [107]:
conditional_probs_matrix = conditional_probs_normalized.values
conditional_probs_matrix.T

array([[0.22727273, 0.21907216],
       [0.26818182, 0.29639175],
       [0.23636364, 0.25773196],
       [0.26818182, 0.22680412]])

In [112]:
len(conditional_probs_normalized.columns)

4

In [113]:
cpd_attack_vector = TabularCPD(variable='Attack_Vector', 
                              variable_card=len(conditional_probs_normalized.columns),  
                              values=[[0.22727273, 0.21907216],
                                      [0.26818182, 0.29639175],
                                      [0.23636364, 0.25773196],
                                      [0.26818182, 0.22680412]],
                                      
                              evidence=["Threat_Actor"],
                              evidence_card=[2])

In [114]:
print(cpd_attack_vector)

+------------------+-----------------+-----------------+
| Threat_Actor     | Threat_Actor(0) | Threat_Actor(1) |
+------------------+-----------------+-----------------+
| Attack_Vector(0) | 0.22727273      | 0.21907216      |
+------------------+-----------------+-----------------+
| Attack_Vector(1) | 0.26818182      | 0.29639175      |
+------------------+-----------------+-----------------+
| Attack_Vector(2) | 0.23636364      | 0.25773196      |
+------------------+-----------------+-----------------+
| Attack_Vector(3) | 0.26818182      | 0.22680412      |
+------------------+-----------------+-----------------+
