In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


In [4]:

# Load the dataset
file_path = "dataset_final.csv"
df = pd.read_csv(file_path)

# Preprocessing
print("\nDataset Preview:\n", df.head())
df = df.dropna()
df = pd.get_dummies(df)
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df)



Dataset Preview:
          Time  Temperature  Humidity  Air Quality  Light  Loudness
0  1623781306        37.94     28.94           75    644       106
1  1623781316        37.94     29.00           75    645       145
2  1623781326        37.88     28.88           75    644       146
3  1623781336        37.72     28.94           75    646       139
4  1623781346        37.69     29.19           75    644       155


In [5]:

# Anomaly Detection Models
iso_forest = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
df['Anomaly_IsolationForest'] = iso_forest.fit_predict(scaled_data)

lof = LocalOutlierFactor(n_neighbors=20, contamination=0.05)
df['Anomaly_LOF'] = lof.fit_predict(scaled_data)
df['Anomaly_IsolationForest'] = df['Anomaly_IsolationForest'].apply(lambda x: 1 if x == -1 else 0)
df['Anomaly_LOF'] = df['Anomaly_LOF'].apply(lambda x: 1 if x == -1 else 0)

# Autoencoder Model
input_dim = scaled_data.shape[1]
autoencoder = keras.Sequential([
    layers.Input(shape=(input_dim,)),
    layers.Dense(32, activation="relu"),
    layers.Dense(16, activation="relu"),
    layers.Dense(32, activation="relu"),
    layers.Dense(input_dim, activation="linear")
])
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(scaled_data, scaled_data, epochs=20, batch_size=32, shuffle=True, validation_split=0.2, verbose=1)

# Compute Anomaly Scores
reconstructed = autoencoder.predict(scaled_data)
mse = np.mean(np.power(scaled_data - reconstructed, 2), axis=1)
threshold = np.percentile(mse, 95)
df['Anomaly_Autoencoder'] = (mse > threshold).astype(int)

df['Anomaly_Score'] = mse



Epoch 1/20
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.2684 - val_loss: 0.1906
Epoch 2/20
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0109 - val_loss: 0.0663
Epoch 3/20
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0053 - val_loss: 0.0486
Epoch 4/20
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0034 - val_loss: 0.0424
Epoch 5/20
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0027 - val_loss: 0.0419
Epoch 6/20
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0017 - val_loss: 0.0429
Epoch 7/20
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0011 - val_loss: 0.0369
Epoch 8/20
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 8.5377e-04 - val_loss: 0.0313
Epoch 9/20
[1m164/164[0m [32m━━━━

In [6]:
# 3D PCA Visualization
pca = PCA(n_components=3)
reduced_data = pca.fit_transform(scaled_data)
df['PCA1'], df['PCA2'], df['PCA3'] = reduced_data[:, 0], reduced_data[:, 1], reduced_data[:, 2]

fig_3d = px.scatter_3d(df, x='PCA1', y='PCA2', z='PCA3',
                        color=df['Anomaly_Autoencoder'].astype(str),
                        title='3D PCA Visualization of Anomalies',
                        labels={'color': 'Anomaly'},
                        opacity=0.7)
fig_3d.show()


In [8]:

# Interactive Anomaly Score Plot
fig_score = px.scatter(df, x=df.index, y='Anomaly_Score', color='Anomaly_Autoencoder',
                       title='Anomaly Score Distribution',
                       labels={'Anomaly_Autoencoder': 'Anomaly (1=Outlier, 0=Normal)'})
fig_score.show()



In [9]:

# Heatmap for Anomaly Detection
fig_heatmap = go.Figure(data=go.Heatmap(
    z=df[['Anomaly_IsolationForest', 'Anomaly_LOF', 'Anomaly_Autoencoder']].T,
    x=df.index,
    y=['Isolation Forest', 'Local Outlier Factor', 'Autoencoder'],
    colorscale='RdBu'))
fig_heatmap.update_layout(title='Anomaly Detection Methods Comparison')
fig_heatmap.show()


In [11]:

# Save results
df.to_csv("anomaly_results.csv", index=False)
print("\nResults saved as anomaly_results.csv")

# Insights from Anomaly Detection Models:
# Isolation Forest detected 28 anomalies (best for global outliers).
# Local Outlier Factor (LOF) found 33 anomalies (captures local density variations).
# Autoencoder detected 26 anomalies (deep learning-based pattern recognition).


Results saved as anomaly_results.csv
