# Sensor Data Analysis and Feature Engineering
This notebook provides a step-by-step process for advanced analysis, feature engineering, visualization, and report generation for sensor data. Each section includes explanatory markdown and corresponding code cells.

## 1. Setup and Imports
Import necessary libraries and set up environment.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.cluster import DBSCAN
from scipy.fft import fft
from scipy.stats import iqr

# Configure plot aesthetics
plt.rcParams['figure.figsize'] = (10, 6)


## 2. Data Loading
Load the sensor data CSV file with proper parsing.

In [None]:
# Load data
df = pd.read_csv('sensor_data.csv', parse_dates=['timestamp'], index_col='timestamp')
df.head()

## 3. Advanced Data Analysis
### 3.1 Time-Series Decomposition
Decompose each feature into trend, seasonal, and residual components.

In [None]:
features = ['temperature', 'vibration', 'pressure']

for feature in features:
    decomposition = seasonal_decompose(df[feature].dropna(), model='additive', period=60)
    fig = decomposition.plot()
    fig.suptitle(f'Time-Series Decomposition for {feature.capitalize()}', y=1.05)
    plt.show()

### 3.2 Principal Component Analysis (PCA)
Scale features and perform PCA to reduce to 2 components.

In [None]:
X = df[features].dropna()
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

print("Explained Variance Ratio:", pca.explained_variance_ratio_)


### 3.3 Anomaly Detection
Use Isolation Forest and DBSCAN to detect anomalies.

In [None]:
# Isolation Forest
iso_forest = IsolationForest(contamination=0.05, random_state=42)
df['anomaly_iso'] = iso_forest.fit_predict(X_scaled)

# DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=5)
df['anomaly_dbscan'] = dbscan.fit_predict(X_scaled)

df[['anomaly_iso', 'anomaly_dbscan']].value_counts()

## 4. Feature Engineering
Create derived features to enhance modeling.

In [None]:
# Rate of change
for feature in features:
    df[f'{feature}_diff'] = df[feature].diff()

# Exponential moving averages and degradation
for feature in features:
    df[f'{feature}_ema_short'] = df[feature].ewm(span=10).mean()
    df[f'{feature}_ema_long'] = df[feature].ewm(span=50).mean()
    df[f'{feature}_degradation'] = df[f'{feature}_ema_short'] - df[f'{feature}_ema_long']

# Rolling statistics
window = 20
for feature in features:
    df[f'{feature}_rolling_mean'] = df[feature].rolling(window).mean()
    df[f'{feature}_rolling_std'] = df[feature].rolling(window).std()
    df[f'{feature}_rolling_iqr'] = df[feature].rolling(window).apply(iqr)

## 5. Visualization
Plot key insights from data and engineered features.

In [None]:
# Correlation heatmap
import seaborn as sns
corr = df[features].corr()
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title("Feature Correlation Heatmap")
plt.show()

# PCA Scatter with anomalies
plt.scatter(X_pca[:,0], X_pca[:,1], c=df['anomaly_iso'], alpha=0.6)
plt.xlabel('PC1'); plt.ylabel('PC2')
plt.title('PCA Scatter Plot with Isolation Forest Anomalies')
plt.colorbar(label='Anomaly')
plt.show()

## 6. Report Generation
Save analysis summaries to text files.

In [None]:
# Advanced Analysis Report
with open('advanced_analysis_report.txt', 'w') as f:
    f.write("Advanced Analysis Report\n========================\n")
    f.write(f"PCA Explained Variance: {pca.explained_variance_ratio_}\n")
    f.write(f"Number of Isolation Forest anomalies: {(df['anomaly_iso']==-1).sum()}\n")
    f.write(f"Number of DBSCAN anomalies: {(df['anomaly_dbscan']==-1).sum()}\n")

# Feature Engineering Summary
with open('feature_engineering_summary.txt', 'w') as f:
    f.write("Feature Engineering Summary\n===========================\n")
    for col in df.columns:
        if 'rolling' in col or 'ema' in col or '_diff' in col:
            f.write(f"- {col}\n")
