In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler

# Load the sensor data
# Replace with the actual path or file name if necessary
df = pd.read_csv('sensor_data.csv')  # Example name, update as needed
print("Initial Data Sample:")
display(df.head())

# Data Preprocessing
# Drop rows with missing values
df.dropna(inplace=True)

# Select only numerical columns for scaling and modeling
numeric_cols = df.select_dtypes(include=[np.number]).columns
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[numeric_cols])

# Apply Isolation Forest for anomaly detection
model = IsolationForest(n_estimators=100, contamination=0.01, random_state=42)
df['anomaly'] = model.fit_predict(scaled_data)

# Mark anomalies: -1 = anomaly, 1 = normal
anomalies = df[df['anomaly'] == -1]

# Visualize anomalies for one sensor column
# Replace 'sensor_column' with an actual sensor column name
sensor_col = numeric_cols[0]  # For example, first sensor column
plt.figure(figsize=(12,6))
sns.scatterplot(x=df.index, y=df[sensor_col], hue=df['anomaly'], palette={1: 'blue', -1: 'red'})
plt.title('Anomaly Detection in Sensor Data')
plt.xlabel('Index')
plt.ylabel(sensor_col)
plt.legend(title='Anomaly')
plt.grid(True)
plt.show()

# Summary
print(f"Total records: {len(df)}")
print(f"Anomalies detected: {len(anomalies)}")
display(anomalies.head())