In [90]:
#importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [91]:
#importing the data set
dataset = pd.read_csv('ML-EdgeIIoT-dataset.csv', low_memory=False)

X = dataset.iloc[:, 3:-2].values #Selecting colomns from index 3 to the second last column as features
Y = dataset.iloc[:, -1].values #Selecting the last column as label



In [92]:
# 2. CONVERSION & INFINITY FIX (The crucial part)
# Convert to a DataFrame first to use powerful cleaning tools
X_df = pd.DataFrame(X)

# Convert all text to numbers, turning errors into 'NaN'
X_df = X_df.apply(pd.to_numeric, errors='coerce')

# Replace 'Infinity' or '-Infinity' with 'NaN'
X_df.replace([np.inf, -np.inf], np.nan, inplace=True)

# Fill all NaNs (and the infinities we just removed) with 0
X_df.fillna(0, inplace=True)

# This caps giant numbers at a safe limit for float32
X_df = X_df.clip(lower=-1e38, upper=1e38)

# Convert to float32 (the exact format the error is asking for)
X_clean = X_df.values.astype(np.float32)

# Convert back to a clean numpy array for the AI
X = X_df.values


In [93]:
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder

# Turning names into numbers
le = LabelEncoder()
Y = le.fit_transform(Y)

In [94]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=42)

In [95]:
from sklearn.ensemble import RandomForestClassifier

classifier = RandomForestClassifier(n_estimators = 100, random_state = 42)
classifier.fit(X_train, Y_train)

Y_pred = classifier.predict(X_test)


In [None]:
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier

# 1. Initialize and Train
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, Y_train)

# 2. Predict and Evaluate
Y_pred_dt = dt_classifier.predict(X_test)
dt_accuracy = accuracy_score(Y_test, Y_pred_dt)

print(f"Decision Tree Accuracy: {dt_accuracy*100:.2f}%")


Decision Tree Accuracy: 93.90%


In [97]:
from sklearn.neighbors import KNeighborsClassifier

# we use 5 neighbors as a standard starting point
knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X_train, Y_train)

Y_pred_knn = knn_classifier.predict(X_test)
knn_accuracy = accuracy_score(Y_test, Y_pred_knn)

print(f"KNN Accuracy: {knn_accuracy*100:.2f}%")

KNN Accuracy: 63.47%


In [99]:
%pip install xgboost
from xgboost import XGBClassifier

# Initialize and Train
xgb_classifier = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
xgb_classifier.fit(X_train, Y_train)

# Predict and Confusion Matrix
Y_pred_xgb = xgb_classifier.predict(X_test)
xgb_acc = accuracy_score(Y_test, Y_pred_xgb)

print(f"XGBoost Accuracy: {xgb_acc*100:.2f}%")


Collecting xgboost
  Downloading xgboost-3.1.3-py3-none-win_amd64.whl.metadata (2.0 kB)
Downloading xgboost-3.1.3-py3-none-win_amd64.whl (72.0 MB)
   ---------------------------------------- 0.0/72.0 MB ? eta -:--:--
   ---------------------------------------- 0.5/72.0 MB 3.0 MB/s eta 0:00:24
    --------------------------------------- 1.3/72.0 MB 3.7 MB/s eta 0:00:20
   - -------------------------------------- 2.1/72.0 MB 3.9 MB/s eta 0:00:19
   - -------------------------------------- 3.1/72.0 MB 4.0 MB/s eta 0:00:18
   -- ------------------------------------- 3.9/72.0 MB 4.1 MB/s eta 0:00:17
   -- ------------------------------------- 4.7/72.0 MB 4.0 MB/s eta 0:00:18
   --- ------------------------------------ 5.5/72.0 MB 4.0 MB/s eta 0:00:17
   --- ------------------------------------ 6.3/72.0 MB 4.0 MB/s eta 0:00:17
   ---- ----------------------------------- 7.3/72.0 MB 4.0 MB/s eta 0:00:17
   ---- ----------------------------------- 8.1/72.0 MB 4.0 MB/s eta 0:00:16
   ---- -----

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


XGBoost Accuracy: 94.59%


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, accuracy_score

# 1. Define a list of our models
models = [
    ('Random Forest', classifier), # We already trained this one
    ('Decision Tree', dt_classifier),
    ('KNN', knn_classifier),
    ('XGBoost', xgb_classifier)
]

# 2. Loop through and plot
for name, model in models:
    # Get predictions
    Y_pred_current = model.predict(X_test)
    acc = accuracy_score(Y_test, Y_pred_current)
    
    # Calculate Confusion Matrix
    cm = confusion_matrix(Y_test, Y_pred_current)
    
    # Plotting
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Reds', 
                xticklabels=le.classes_, yticklabels=le.classes_)
    plt.title(f'Confusion Matrix: {name} (Accuracy: {acc*100:.2f}%)')
    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    plt.show()
    
    print(f"{name} analysis complete.\n" + "-"*30)

SyntaxError: invalid syntax. Perhaps you forgot a comma? (1945920177.py, line 10)