### Cybersecurity Network Traffic Analysis

### Installing and Importing pandas

In [76]:
# Install and import pandas
import pandas as pd

### Load the Dataset from GitHub for Analysis

In [77]:
### Load dataset from Github
url = "https://raw.githubusercontent.com/ritaafrica/data/refs/heads/main/network_traffic_data.csv"

In [78]:
# Reading CSV file into a DataFrame
df = pd.read_csv(url)

### Exploring traffic in the dataset

In [79]:
# Displaying the first five rows of the dataset
df.head()

Unnamed: 0,Timestamp,Source_IP,Destination_IP,Protocol,Port,Bytes_Sent,Bytes_Received,Status,Threat_Level
0,2025-03-19 13:04:10,10.0.0.15,192.168.1.20,TCP,,5411,8989,Blocked,Low
1,2025-03-19 13:03:40,192.168.1.13,172.217.169.46,ICMP,443.0,4999,11808,Allowed,Medium
2,2025-03-19 13:03:10,10.0.0.5,203.0.113.99,HTTP,443.0,6360,10852,Allowed,Medium
3,2025-03-19 13:02:40,10.0.0.9,192.168.1.20,TCP,,4011,14314,Blocked,Low
4,2025-03-19 13:02:10,192.168.1.4,172.217.169.46,FTP,,5254,8718,Blocked,Medium


### Analyze trends based on specific feature correlations

In [80]:
# Check the total number of rows and columns using f strings
print(f"Dataset Shape: {df.shape}")
print(f"Total Rows: {df.shape[0]}")
print(f"Total Columns: {df.shape[1]}")

Dataset Shape: (1000, 9)
Total Rows: 1000
Total Columns: 9


In [81]:
# Get the column names
print("\nColumn Name:")
print(df.columns)


Column Name:
Index(['Timestamp', 'Source_IP', 'Destination_IP', 'Protocol', 'Port',
       'Bytes_Sent', 'Bytes_Received', 'Status', 'Threat_Level'],
      dtype='object')


In [82]:
# Display information about the dataset
print("\nDataset Info:")
print(df.info())


Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Timestamp       1000 non-null   object 
 1   Source_IP       1000 non-null   object 
 2   Destination_IP  1000 non-null   object 
 3   Protocol        1000 non-null   object 
 4   Port            874 non-null    float64
 5   Bytes_Sent      1000 non-null   int64  
 6   Bytes_Received  1000 non-null   int64  
 7   Status          1000 non-null   object 
 8   Threat_Level    1000 non-null   object 
dtypes: float64(1), int64(2), object(6)
memory usage: 70.4+ KB
None


In [83]:
# Show a summary statistics on average
print("\nSummary Statistics:")
print(df.describe())


Summary Statistics:
              Port   Bytes_Sent  Bytes_Received
count   874.000000  1000.000000     1000.000000
mean   1819.739130  5143.572000     7562.659000
std    2899.374632  2808.256143     4240.206295
min      21.000000   106.000000      102.000000
25%      22.000000  2857.000000     4025.500000
50%      80.000000  5224.000000     7584.500000
75%    3389.000000  7487.750000    11147.750000
max    8080.000000  9984.000000    14977.000000


### Structure the Dataset for further Analysis

In [84]:
# Challenge 4
# Filter  traffic base on targeted columns 
target_columns = df[["Timestamp", "Source_IP", "Destination_IP", "Status"]]

# Display the first few rows of the targeted traffic
print(target_columns.head())

             Timestamp     Source_IP  Destination_IP   Status
0  2025-03-19 13:04:10     10.0.0.15    192.168.1.20  Blocked
1  2025-03-19 13:03:40  192.168.1.13  172.217.169.46  Allowed
2  2025-03-19 13:03:10      10.0.0.5    203.0.113.99  Allowed
3  2025-03-19 13:02:40      10.0.0.9    192.168.1.20  Blocked
4  2025-03-19 13:02:10   192.168.1.4  172.217.169.46  Blocked


In [85]:
# Challenge 5
# Store targeted traffic in a variable for further analysis
# Create and Store a single column of network activity as a series
source_ips = df["Source_IP"]


In [86]:
# Create and Store multiple columns of network activities in a DataFrame
network_activity = df[["Source_IP", "Destination_IP", "Protocol", "Threat_Level"]]


In [87]:
# Display the first few rows of the network activities
print(network_activity.head())

      Source_IP  Destination_IP Protocol Threat_Level
0     10.0.0.15    192.168.1.20      TCP          Low
1  192.168.1.13  172.217.169.46     ICMP       Medium
2      10.0.0.5    203.0.113.99     HTTP       Medium
3      10.0.0.9    192.168.1.20      TCP          Low
4   192.168.1.4  172.217.169.46      FTP       Medium


### Filtering blocked traffic

In [88]:
# Challenge 6
# Detect unusual patterns in the network
# Filter supicious traffic
blocked_traffic = df[df["Status"] == "Blocked"]

In [89]:
# Select key details for analysis
blocked_summary = blocked_traffic[["Timestamp", "Source_IP", "Destination_IP", "Threat_Level"]]

In [90]:
# Analyze security threats by extracting all blocked requests
print(blocked_summary.head())

              Timestamp    Source_IP  Destination_IP Threat_Level
0   2025-03-19 13:04:10    10.0.0.15    192.168.1.20          Low
3   2025-03-19 13:02:40     10.0.0.9    192.168.1.20          Low
4   2025-03-19 13:02:10  192.168.1.4  172.217.169.46       Medium
9   2025-03-19 12:59:40    10.0.0.43        10.0.0.5          Low
10  2025-03-19 12:59:10    10.0.0.33    203.0.113.99       Medium


In [91]:
# Display the total amount of blocked traffic
blocked_traffic.shape

(532, 9)

### Filtering Suspicious Traffic

In [92]:
# Challange 7
# Identify high potential security threats that need further investigation
high_risk_traffic = df[df["Threat_Level"] == "Critical"]

In [93]:
# Filter cretical traffic threats
print(high_risk_traffic.head())

               Timestamp     Source_IP  Destination_IP Protocol    Port  \
59   2025-03-19 12:34:40     10.0.0.47    192.168.1.20     ICMP     NaN   
96   2025-03-19 12:16:10  192.168.1.35    203.0.113.99      FTP  8080.0   
134  2025-03-19 11:57:10  192.168.1.17  172.217.169.46      DNS    22.0   
150  2025-03-19 11:49:10  192.168.1.42        10.0.0.5     HTTP    53.0   
209  2025-03-19 11:19:40     10.0.0.17    203.0.113.99      TCP  3389.0   

     Bytes_Sent  Bytes_Received   Status Threat_Level  
59         5885             463  Allowed     Critical  
96         9371            7189  Allowed     Critical  
134        6714           13124  Blocked     Critical  
150        2702             634  Allowed     Critical  
209        5085           10014  Blocked     Critical  


In [62]:
# Filter total amount of threats
high_risk_traffic.shape

(47, 9)

### Detecting high data transfers

In [94]:
# Challenge 8
# Filter suspicious behaviour of high data transfers
high_data_transfer = df[df["Bytes_Sent"] > 5000]

In [95]:
# Display suspicious behaviour of high data transfers
print(high_data_transfer.head())

             Timestamp     Source_IP  Destination_IP Protocol   Port  \
0  2025-03-19 13:04:10     10.0.0.15    192.168.1.20      TCP    NaN   
2  2025-03-19 13:03:10      10.0.0.5    203.0.113.99     HTTP  443.0   
4  2025-03-19 13:02:10   192.168.1.4  172.217.169.46      FTP    NaN   
5  2025-03-19 13:01:40     10.0.0.43  172.217.169.46      DNS   53.0   
7  2025-03-19 13:00:40  192.168.1.36    192.168.1.20      TCP   21.0   

   Bytes_Sent  Bytes_Received   Status Threat_Level  
0        5411            8989  Blocked          Low  
2        6360           10852  Allowed       Medium  
4        5254            8718  Blocked       Medium  
5        6915           12981  Allowed          Low  
7        5655             119  Allowed       Medium  


In [96]:
# Filter total amount of high data transfers
high_data_transfer.shape

(518, 9)

In [97]:
# Show the total number of high data transfers
print(f"Number of high-data transfers: {len(high_data_transfer)}")

Number of high-data transfers: 518


### Classifying network threats

In [98]:
# Challenge 9
# Splitting dataset into X (Features) and y(Target Variable)
# Selecting features (X) - Exclude the target variable
X = df.drop(columns=["Threat_Level"])



In [99]:
# Select the target variable (y)
y = df["Threat_Level"]

In [100]:
# Display the first few rows of X and y
print("Features (X):")
print(X.head())

Features (X):
             Timestamp     Source_IP  Destination_IP Protocol   Port  \
0  2025-03-19 13:04:10     10.0.0.15    192.168.1.20      TCP    NaN   
1  2025-03-19 13:03:40  192.168.1.13  172.217.169.46     ICMP  443.0   
2  2025-03-19 13:03:10      10.0.0.5    203.0.113.99     HTTP  443.0   
3  2025-03-19 13:02:40      10.0.0.9    192.168.1.20      TCP    NaN   
4  2025-03-19 13:02:10   192.168.1.4  172.217.169.46      FTP    NaN   

   Bytes_Sent  Bytes_Received   Status  
0        5411            8989  Blocked  
1        4999           11808  Allowed  
2        6360           10852  Allowed  
3        4011           14314  Blocked  
4        5254            8718  Blocked  


In [101]:
# Display target variable (y) and the first few rows
print("\nTarget Variable (y):")
print(y.head())


Target Variable (y):
0       Low
1    Medium
2    Medium
3       Low
4    Medium
Name: Threat_Level, dtype: object


### Eleminating Timestamps to focus on netwok behaviour

In [102]:
# Challenge 10
# Remove the time an incendents was recorded on the network
df =df.drop(columns=["Timestamp"])

In [103]:
# Display the first few rows to confirm 
print(df.head())

      Source_IP  Destination_IP Protocol   Port  Bytes_Sent  Bytes_Received  \
0     10.0.0.15    192.168.1.20      TCP    NaN        5411            8989   
1  192.168.1.13  172.217.169.46     ICMP  443.0        4999           11808   
2      10.0.0.5    203.0.113.99     HTTP  443.0        6360           10852   
3      10.0.0.9    192.168.1.20      TCP    NaN        4011           14314   
4   192.168.1.4  172.217.169.46      FTP    NaN        5254            8718   

    Status Threat_Level  
0  Blocked          Low  
1  Allowed       Medium  
2  Allowed       Medium  
3  Blocked          Low  
4  Blocked       Medium  


In [104]:
high_data_transfer = df[df["Bytes_Sent"] >5000]

In [105]:
print(high_data_transfer.head())

      Source_IP  Destination_IP Protocol   Port  Bytes_Sent  Bytes_Received  \
0     10.0.0.15    192.168.1.20      TCP    NaN        5411            8989   
2      10.0.0.5    203.0.113.99     HTTP  443.0        6360           10852   
4   192.168.1.4  172.217.169.46      FTP    NaN        5254            8718   
5     10.0.0.43  172.217.169.46      DNS   53.0        6915           12981   
7  192.168.1.36    192.168.1.20      TCP   21.0        5655             119   

    Status Threat_Level  
0  Blocked          Low  
2  Allowed       Medium  
4  Blocked       Medium  
5  Allowed          Low  
7  Allowed       Medium  
