In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as mtp
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix


In [None]:
data=pd.read_csv("shuffle_attack.csv")

data.head(5)

In [5]:
# Assuming 'AttackLabel' is the column indicating the type of attack
X = data.drop('Attack', axis=1)
y = data['Attack']

# One-hot encode categorical variables
X = pd.get_dummies(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.25, random_state=0)



In [19]:
# Step 2: Fitting the Random Forest Algorithm
from sklearn.ensemble import RandomForestClassifier

# Initialize the classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Fit the classifier to the training data
rf_classifier.fit(X_train, y_train)

# Step 3: Predicting the Test Result
rf_pred = rf_classifier.predict(X_test)

In [None]:
# Step 4: Test Accuracy & Confusion Matrix
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Calculate accuracy
rf_accuracy = accuracy_score(y_test, rf_pred)

rf_cm= confusion_matrix(y_test, rf_pred)

print(rf_accuracy)


### **RandomForest Visualizing**

In [None]:
# Step 5: Visualizing the Test Set Result
import matplotlib.pyplot as plt
import seaborn as sns

# Create a heatmap for the confusion matrix
plt.figure(figsize=(12, 8))
sns.heatmap(rf_cm, annot=True, fmt='d', cmap='Blues', xticklabels=rf_classifier.classes_, yticklabels=rf_classifier.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

# # Print classification report
# print(report)

# Print precision, recall, f1-score, and accuracy
from sklearn.metrics import accuracy_score

rf_accuracy = accuracy_score(y_test, rf_pred)
print(f"Accuracy: {rf_accuracy}")

# # Extract precision, recall, and f1-score from the report
# precision = float(report.split()[-4])
# recall = float(report.split()[-3])
# f1_score = float(report.split()[-2])

# print(f"Precision: {precision}")
# print(f"Recall: {recall}")
# print(f"F1-Score: {f1_score}")

### **=> Training and Testing Set Visualizing**

In [None]:
import matplotlib.pyplot as plt

# Assuming 'y_train' and 'y_test' are your training and testing labels

# Plotting the distribution of classes in the training set
plt.figure(figsize=(8, 6))
plt.title('Distribution of Classes in Training Set')
plt.hist(y_train, bins=len(y_train.unique()), alpha=0.7, color='blue', label='Training Set')
plt.xlabel('Attack Labels')
plt.ylabel('Frequency')
plt.xticks(rotation=45)
plt.legend()
plt.show()

print()

# Plotting the distribution of classes in the testing set
plt.figure(figsize=(8, 6))
plt.title('Distribution of Classes in Testing Set')
plt.hist(y_test, bins=len(y_test.unique()), alpha=0.7, color='green', label='Testing Set')
plt.xlabel('Attack Labels')
plt.ylabel('Frequency')
plt.xticks(rotation=45)
plt.legend()
plt.show()

In [None]:
# Plotting the distribution of classes in the training set using a pie chart
plt.figure(figsize=(8, 8))
plt.title('Distribution of Classes in Training Set')
y_train.value_counts().plot.pie(autopct='%1.1f%%', startangle=90, cmap='Blues')
plt.ylabel('')
plt.show()

print()

# Plotting the distribution of classes in the testing set using a pie chart
plt.figure(figsize=(8, 8))
plt.title('Distribution of Classes in Testing Set')
y_test.value_counts().plot.pie(autopct='%1.1f%%', startangle=90, cmap='Greens')
plt.ylabel('')
plt.show()


In [None]:
# Plotting the distribution of classes in the training set using a bar chart
plt.figure(figsize=(10, 6))
plt.title('Distribution of Classes in Training Set')
y_train.value_counts().plot(kind='bar', color='blue')
plt.xlabel('Attack Labels')
plt.ylabel('Frequency')
plt.xticks(rotation=45)
plt.show()

print()

# Plotting the distribution of classes in the testing set using a bar chart
plt.figure(figsize=(10, 6))
plt.title('Distribution of Classes in Testing Set')
y_test.value_counts().plot(kind='bar', color='green')
plt.xlabel('Attack Labels')
plt.ylabel('Frequency')
plt.xticks(rotation=45)
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Get the training and testing accuracy
train_accuracy = rf_classifier.score(X_train, y_train)
test_accuracy = rf_classifier.score(X_test, y_test)

# Create a line chart
labels = ['Training Accuracy', 'Testing Accuracy']
values = [train_accuracy, test_accuracy]

plt.figure(figsize=(10, 5))
plt.plot(labels, values, marker='o')
plt.title('Training and Testing Accuracy')
plt.xlabel('Dataset')
plt.ylabel('Accuracy')
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Get the training and testing accuracy
train_accuracy = rf_classifier.score(X_train, y_train)
test_accuracy = rf_classifier.score(X_test, y_test)

# Create a bar chart
labels = ['Training Accuracy', 'Testing Accuracy']
values = [train_accuracy, test_accuracy]

plt.figure(figsize=(10, 5))
plt.bar(labels, values, color=['blue', 'green'])
plt.title('Training and Testing Accuracy')
plt.xlabel('Dataset')
plt.ylabel('Accuracy')
plt.show()


## **-> 2. Logistic Regression**

In [11]:
#feature Scaling
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
X_train= st_x.fit_transform(X_train)
X_test= st_x.transform(X_test)

In [None]:
#Fitting Logistic Regression to the training set
from sklearn.linear_model import LogisticRegression
lg_classifier= LogisticRegression(random_state=0)
lg_classifier.fit(X_train, y_train)

In [23]:
#Predicting the test set result
lg_pred= lg_classifier.predict(X_test)

In [50]:
# Step 4: Test Accuracy & Confusion Matrix
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Calculate accuracy
lg_accuracy = accuracy_score(y_test, lg_pred)

lg_cm= confusion_matrix(y_test, lg_pred)

print(lg_accuracy)

0.8068669527896996


### Logistic Regression **Visualizing**

In [None]:
# Step 5: Visualizing the Test Set Result
import matplotlib.pyplot as plt
import seaborn as sns

# Create a heatmap for the confusion matrix
plt.figure(figsize=(12, 8))
sns.heatmap(lg_cm, annot=True, fmt='d', cmap='Blues', xticklabels=lg_classifier.classes_, yticklabels=lg_classifier.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

## **=> 3. KNN Algorithm**

In [35]:
#Fitting K-NN classifier to the training set
from sklearn.neighbors import KNeighborsClassifier
knn_classifier= KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2 )
knn_classifier.fit(X_train, y_train)

In [36]:
#Predicting the test set result
knn_pred= knn_classifier.predict(X_test)

In [40]:
# Step 4: Test Accuracy & Confusion Matrix
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Calculate accuracy
knn_accuracy = accuracy_score(y_test, knn_pred)

knn_cm= confusion_matrix(y_test, knn_pred)

print(knn_accuracy)

0.6824034334763949


## **KNN-ALGORITHM VISUALIZING**

In [None]:
# Step 5: Visualizing the Test Set Result
import matplotlib.pyplot as plt
import seaborn as sns

# Create a heatmap for the confusion matrix
plt.figure(figsize=(12, 8))
sns.heatmap(knn_cm, annot=True, fmt='d', cmap='Blues', xticklabels=knn_classifier.classes_, yticklabels=knn_classifier.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

## => **4. SVM**

In [None]:
from sklearn.svm import SVC
svm_classifier = SVC(kernel='linear', random_state=0)
svm_classifier.fit(X_train, y_train)

In [39]:
#Predicting the test set result
svm_pred= svm_classifier.predict(X_test)

In [48]:
# Step 4: Test Accuracy & Confusion Matrix
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Calculate accuracy
svm_accuracy = accuracy_score(y_test, svm_pred)

svm_cm= confusion_matrix(y_test, svm_pred)

print(svm_accuracy)

0.8068669527896996


# **SVM VISUALIZING**

In [None]:
# Step 5: Visualizing the Test Set Result
import matplotlib.pyplot as plt
import seaborn as sns

# Create a heatmap for the confusion matrix
plt.figure(figsize=(12, 8))
sns.heatmap(svm_cm, annot=True, fmt='d', cmap='Blues', xticklabels=svm_classifier.classes_, yticklabels=svm_classifier.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

# **=> 4.Naive Bayes**

In [None]:
# Fitting Naive Bayes to the Training set
from sklearn.naive_bayes import GaussianNB
NB_classifier = GaussianNB()
NB_classifier.fit(X_train, y_train)

In [45]:
# Predicting the Test set results
NB_pred = NB_classifier.predict(X_test)

In [49]:
# Step 4: Test Accuracy & Confusion Matrix
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Calculate accuracy
NB_accuracy = accuracy_score(y_test, NB_pred)

NB_cm= confusion_matrix(y_test, NB_pred)

print(NB_accuracy)

0.7854077253218884


In [None]:
# Step 5: Visualizing the Test Set Result
import matplotlib.pyplot as plt
import seaborn as sns

# Create a heatmap for the confusion matrix
plt.figure(figsize=(12, 8))
sns.heatmap(NB_cm, annot=True, fmt='d', cmap='Blues', xticklabels=NB_classifier.classes_, yticklabels=NB_classifier.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

## **=> DIFFERENT ML ALGORITHM ACCURACY VALUES:**

In [85]:
classifier_list={"RandomForest          ":rf_accuracy,
                 "LOGISTIC_REGRESSION   ":lg_accuracy,
                 "SUPPORT_VECTOR_MACHINE":svm_accuracy,
                 "NAIVE_BAYES           ":NB_accuracy}

print("-----------------------------------------------")
print("| Machine learning Model |     Accuracy       |   ")
print("-----------------------------------------------")
for i, j in classifier_list.items():
  print("|",i,"|",j,"|")
  print("-----------------------------------------------")


-----------------------------------------------
| Machine learning Model |     Accuracy       |   
-----------------------------------------------
| RandomForest           | 0.8454935622317596 |
-----------------------------------------------
| LOGISTIC_REGRESSION    | 0.8068669527896996 |
-----------------------------------------------
| SUPPORT_VECTOR_MACHINE | 0.8068669527896996 |
-----------------------------------------------
| NAIVE_BAYES            | 0.7854077253218884 |
-----------------------------------------------
