# ML Driven Anomaly Detection  for IoT Edge Devices: Insights from ADMM-Based Frequency Management


In [1]:
# Importing Libraries

In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import seaborn as sns
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
import joblib
import os
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

## Importing dataset

In [None]:
dataset = pd.read_csv(r"dataset.csv")
dataset

In [None]:
dataset['anomaly'].unique()

In [None]:
dataset.head()

In [None]:
dataset.describe()

In [None]:
dataset.info()

In [None]:
dataset.isnull().sum()

In [8]:
# Defining Dependent and independent variables

In [None]:
X= dataset.iloc[:,0:12]
X

In [None]:
y= dataset.iloc[:, -1]
y

In [None]:
# Create a count plot
sns.set(style="darkgrid")  # Set the style of the plot
plt.figure(figsize=(8, 6))  # Set the figure size
# Replace 'dataset' with your actual DataFrame and 'Drug' with the column name
ax = sns.countplot(x=y,palette="Set3")
plt.title("Count Plot")  # Add a title to the plot
plt.xlabel("Categories")  # Add label to x-axis
plt.ylabel("Count")  # Add label to y-axis
# Annotate each bar with its count value
for p in ax.patches:
    ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
                ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
                textcoords='offset points')

plt.show()  # Display the plot

In [12]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [13]:
#Feature scaling

In [14]:
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.fit_transform(X_test)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [17]:
labels = ["Frequency Drift","Capacity Breach","Dual Signal Interference","Request Overload"]

In [18]:
#defining global variables to store accuracy and other metrics
precision = []
recall = []
fscore = []
accuracy = []

In [19]:
#function to calculate various metrics such as accuracy, precision etc
def calculateMetrics(algorithm, predict, testY):
    testY = testY.astype('int')
    predict = predict.astype('int')
    p = precision_score(testY, predict,average='macro') * 100
    r = recall_score(testY, predict,average='macro') * 100
    f = f1_score(testY, predict,average='macro') * 100
    a = accuracy_score(testY,predict)*100 
    accuracy.append(a)
    precision.append(p)
    recall.append(r)
    fscore.append(f)
    print(algorithm+' Accuracy    : '+str(a))
    print(algorithm+' Precision   : '+str(p))
    print(algorithm+' Recall      : '+str(r))
    print(algorithm+' FSCORE      : '+str(f))
    report=classification_report(predict, testY,target_names=labels)
    print('\n',algorithm+" classification report\n",report)
    conf_matrix = confusion_matrix(testY, predict) 
    plt.figure(figsize =(5, 5)) 
    ax = sns.heatmap(conf_matrix, xticklabels = labels, yticklabels = labels, annot = True, cmap="Blues" ,fmt ="g");
    ax.set_ylim([0,len(labels)])
    plt.title(algorithm+" Confusion matrix") 
    plt.ylabel('True class') 
    plt.xlabel('Predicted class') 
    plt.show()

# Logistic Regresssion

In [None]:
# Check if the pkl file exists
if os.path.exists('LogisticRegression_weights.pkl'):
    # Load the model from the pkl file
    rf_classifier= joblib.load('LogisticRegression_weights.pkl')
    predict = rf_classifier.predict(X_test)
    calculateMetrics("LogisticRegression", predict, y_test)
else:
    clf = LogisticRegression()
    # Train the classifier on the training data
    clf.fit(X_train, y_train)
    # Make predictions on the test data
    predict=clf.predict(X_test)
    joblib.dump(clf, 'LogisticRegression_weights.pkl')
    print("LogisticRegression model trained and model weights saved.")
    calculateMetrics("LogisticRegression", predict, y_test)

# Decisiontree with AdaBoost Classifier

In [None]:
# Check if the pkl file exists
if os.path.exists('ada_weights.pkl'):
    # Load the model from the pkl file
    classifier= joblib.load('ada_weights.pkl')
    predict = classifier.predict(X_test)
    calculateMetrics("DTC with AdaBoost Classifier", predict, y_test)
else:
    # Initialize a DecisionTreeClassifier as the base estimator for AdaBoost
    base_estimator = DecisionTreeClassifier(max_depth=10)

    # Initialize the AdaBoost model with chosen parameters
    classifier= AdaBoostClassifier(base_estimator=base_estimator)
    
    # Train the classifier on the training data
    classifier.fit(X_train, y_train)
    # Make predictions on the test data
    predict=classifier.predict(X_test)
    # Save the model weights to a pkl file
    joblib.dump(classifier, 'ada_weights.pkl')
    print("DT with Adaboost classifier_model trained and model weights saved.")
    calculateMetrics("DTC with AdaBoost Classifier", predict, y_test)

In [22]:
#Performance Comparision of both the algorithmns

In [None]:
#showing all algorithms performance values
columns = ["Algorithm Name","Precison","Recall","FScore","Accuracy"]
values = []
algorithm_names = ["LogisticRegression","DTC with AdaBoost Classifier"]
for i in range(len(algorithm_names)):
    values.append([algorithm_names[i],precision[i],recall[i],fscore[i],accuracy[i]])
    
temp = pd.DataFrame(values,columns=columns)
temp

In [24]:
#Prediction onnew test data

In [None]:
test=pd.read_csv("test.csv")
test

In [None]:
# Make predictions on the selected test data
predict = classifier.predict(test)

# Loop through each prediction and print the corresponding row
for i, p in enumerate(predict):
    print(test.iloc[i])  # Print the row
    print(f"Row {i}:************************************************** {labels[p]}")

In [27]:
test['Predicted'] = [labels[p] for p in predict]

In [None]:
test