### **Import all the necessary libraries**


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### **Load the dataset**

In [None]:
cyber_sec_dataset=pd.read_csv("cybersecurity_attacks.csv")

## **Exploring the Dataset**

In [None]:
cyber_sec_dataset.head()

In [None]:
cyber_sec_dataset.info() # Dataset Information

In [None]:
cyber_sec_dataset.shape  # Dataset Shape

In [None]:
cyber_sec_dataset.describe()   # Statistics
    

## **Cleaning the Data**

In [None]:
cyber_sec_dataset.isnull().sum()  # Missing Values

In [None]:
cyber_sec_dataset.dropna(inplace=True)  # Handling Missing Values

In [None]:
cyber_sec_dataset.isnull().sum()

In [None]:
cyber_sec_dataset.shape

In [None]:
# Get total No of Duplicate Rows
cyber_sec_dataset.duplicated().sum() 

In [None]:
cyber_sec_dataset.head()

## **Data Visualization**

In [None]:
cyber_sec_dataset['Payload Data'].dtype
# Convert your_data to a string

text = str(cyber_sec_dataset['Payload Data'])  


**Word Cloud**

In [None]:
from wordcloud import WordCloud

In [None]:
# Generate the word cloud
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)

# Display the word cloud
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

**Bar Chart**

In [None]:
# Visualize the distribution of attack types

attack_counts = cyber_sec_dataset['Attack Type'].value_counts()

plt.figure(figsize=(10, 5))
sns.barplot(x=attack_counts.index , y=attack_counts)

plt.xlabel('Attack Type',fontsize=14, fontweight='bold')
plt.ylabel('Count',fontsize=14, fontweight='bold')
plt.title('Distribution of Attack Types', fontsize=16)

plt.xticks(rotation=45)
plt.show()

print(attack_counts)

**Pie Charts**

In [None]:
cyber_sec_dataset['Protocol'].value_counts()

In [None]:
# Data for the pie chart


labels = ['UDP', 'ICMP', 'TCP']
sizes = cyber_sec_dataset['Protocol'].value_counts()  # Proportional sizes of each category
colors = ['red', 'green', 'blue']  # Color for each category segment
explode = (0.1, 0, 0)  # Explode a slice if needed (0 means no explosion)


# Create a pie chart
plt.pie(sizes, labels=labels, colors=colors, explode=explode, autopct='%1.1f%%', startangle=180)


plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.title('Distribution of Network Traffic Protocols')


# Display the pie chart
plt.show()


In [None]:
cyber_sec_dataset['Traffic Type'].value_counts()

In [None]:
# Data for the pie chart

labels =['DNS','FTP','HTTP']
sizes = cyber_sec_dataset['Traffic Type'].value_counts()
colors = ['yellow', 'green', 'orange']
explode = (0.1, 0, 0) 

# Create a pie chart
plt.pie(sizes, labels=labels, colors=colors, explode=explode, autopct='%1.1f%%', startangle=90)

plt.axis('equal')  
plt.title('Distribution of Network Traffic Types')

# Display the pie chart
plt.show()

In [None]:
cyber_sec_dataset['Action Taken'].value_counts()

In [None]:
labels =['Blocked','Ignored','Logged']
sizes =cyber_sec_dataset['Action Taken'].value_counts()
colors = ['Red', 'green', 'blue']
explode = (0.1, 0, 0) 


# Create a pie chart
plt.pie(sizes, labels=labels, colors=colors, explode=explode, autopct='%1.1f%%', startangle=190)

plt.axis('equal')
plt.title('Distribution of Actions Taken')

# Display the pie chart
plt.show()

In [None]:
cyber_sec_dataset['Packet Type'].value_counts()

In [None]:
# Data for the pie chart
labels =['Data','Control']
sizes = cyber_sec_dataset['Packet Type'].value_counts() 
colors = ['blue', 'yellow']
explode = (0, 0) 

# Create a pie chart
plt.pie(sizes, labels=labels, colors=colors, explode=explode, autopct='%1.1f%%', startangle=90)


plt.axis('equal') 
plt.title('Distribution of Packet Types')

# Display the pie chart
plt.show()

In [None]:

print("Severity level and their Data in Our Dataset")
cyber_sec_dataset['Severity Level'].value_counts()

In [None]:
# Data for the pie chart
labels =['High','Medium','Low']
sizes =cyber_sec_dataset['Severity Level'].value_counts()
colors = ['orange', 'blue','pink'] 
explode = (0, 0,0)

# Create a pie chart
plt.pie(sizes, labels=labels, colors=colors, explode=explode, autopct='%1.1f%%', startangle=90)

plt.axis('equal') 
plt.title('Distribution of Severity Levels')

# Display the pie chart
plt.show()

In [None]:

labels =['Server','Firewall']
sizes =cyber_sec_dataset['Log Source'].value_counts()  
colors = ['blue', 'orange']
explode = (0, 0) 

print(cyber_sec_dataset['Log Source'].value_counts())

plt.pie(sizes, labels=labels, colors=colors, explode=explode, autopct='%1.1f%%', startangle=270)

plt.axis('equal')
plt.title('Distribution of Log Sources')

# Display the pie chart
plt.show()

In [None]:
cyber_sec_dataset.columns

In [None]:
print(cyber_sec_dataset["Malware Indicators"].value_counts())
#print(cyber_sec_dataset["Anomaly Scores"])
print()
print(cyber_sec_dataset["Severity Level"].value_counts())
print()
print(cyber_sec_dataset["Attack Signature"].value_counts())
print()
print(cyber_sec_dataset["Attack Type"].value_counts())
print()
print(cyber_sec_dataset['Alerts/Warnings'].value_counts())
print()
print(cyber_sec_dataset['Log Source'].value_counts())
print()
print(cyber_sec_dataset["Firewall Logs"].value_counts())
print()
print(cyber_sec_dataset["IDS/IPS Alerts"].value_counts())


### **Creating Keras Classification Model for Predicting Attack Type**

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.utils import to_categorical


In [None]:
import socket
import struct

df_ = cyber_sec_dataset[['Attack Type','Source IP Address', 'Destination IP Address', 'Source Port', 'Destination Port', 'Protocol', 'Packet Length', 'Packet Type', 'Traffic Type', 'Anomaly Scores', 'Alerts/Warnings', 'Severity Level', 'Network Segment', 'Geo-location Data', 'Proxy Information', 'Firewall Logs', 'IDS/IPS Alerts', 'Log Source']]


enc = LabelEncoder()

for i in ['Attack Type',"Protocol","Packet Type","Traffic Type","Alerts/Warnings","Severity Level","Network Segment","Geo-location Data","Firewall Logs","IDS/IPS Alerts","Log Source"]:
    df_[i] = enc.fit_transform(df_[i])
    
def ip_to_integer(ip):
    return struct.unpack("!I", socket.inet_aton(ip))[0]

# Convert IP addresses to integers
df_['Source IP Address'] = df_['Source IP Address'].apply(ip_to_integer)
df_['Destination IP Address'] = df_['Destination IP Address'].apply(ip_to_integer)
df_['Proxy Information'] = df_['Proxy Information'].apply(ip_to_integer)

df_.drop(columns=["Geo-location Data","Source IP Address","Destination IP Address","Proxy Information"],inplace=True)
df_.head()

In [None]:

corr_m = df_.corr()

target = abs(corr_m["Attack Type"])
relevent = target[target > 0.01]
X_ = list(dict(relevent).keys())
X_.pop(0)

X_

In [None]:
# X variables for Intrusion Detection
X = df_[X_]

X.head()

In [None]:
X.info()

In [None]:

Y = df_['Attack Type']
# Convert Y to categorical one-hot encoding
Y = to_categorical(Y)

In [None]:
# Now splitting the data into training and testing parts

x_train,x_test,y_train,y_test = train_test_split(X,Y,random_state=42)


### **Implementing the Keras Model**

In [None]:

from keras.optimizers import Adam
# Define the model
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(x_train.shape[1],)))
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(3, activation='softmax'))  # 3 classes for multi-class classification

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])


# get the summary of Keras Model
model.summary()

In [None]:

# Train the model
history = model.fit(x_train, y_train, epochs=150, batch_size=64, validation_split=0.2,verbose=0)

### **Let us Evaluate our Model Performance**

In [None]:

# Evaluate the model
loss, accuracy = model.evaluate(X, Y)
print(f'Test Loss: {loss:.4f}')
print(f'Prediction Accuracy: {accuracy:.4f}')

# Get predictions from the model
Y_pred_prob = model.predict(x_test)

# Convert predicted probabilities to one-hot encoded format
Y_pred = np.zeros_like(Y_pred_prob)
Y_pred[np.arange(len(Y_pred_prob)), Y_pred_prob.argmax(axis=-1)] = 1

# Calculate accuracy
accuracy = np.mean(np.all(Y_pred == y_test, axis=1))
print(f'Accuracy: {accuracy:.4f}')

## So, Now using Keras Model we predicted Attack Type 