## 1. Load Sample Data

Load and inspect sample industrial data.

In [None]:
# Import required libraries
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Add parent directory to path to import IDS modules
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('__file__'))))

In [None]:
# Create sample data
data = {
    'timestamp': pd.date_range('2026-01-01', periods=100, freq='H'),
    'temperature': np.random.normal(75, 5, 100),
    'pressure': np.random.normal(100, 10, 100),
    'flow_rate': np.random.normal(50, 8, 100)
}

df = pd.DataFrame(data)
df.head()

## 2. Data Statistics

Calculate basic statistics for the dataset.

In [None]:
# Display summary statistics
df.describe()

## 3. Data Visualization

Visualize the time series data.

In [None]:
# Create subplots for each parameter
fig, axes = plt.subplots(3, 1, figsize=(12, 10))

axes[0].plot(df['timestamp'], df['temperature'], color='red')
axes[0].set_ylabel('Temperature (°F)')
axes[0].set_title('Temperature Over Time')
axes[0].grid(True)

axes[1].plot(df['timestamp'], df['pressure'], color='blue')
axes[1].set_ylabel('Pressure (PSI)')
axes[1].set_title('Pressure Over Time')
axes[1].grid(True)

axes[2].plot(df['timestamp'], df['flow_rate'], color='green')
axes[2].set_ylabel('Flow Rate (GPM)')
axes[2].set_title('Flow Rate Over Time')
axes[2].set_xlabel('Timestamp')
axes[2].grid(True)

plt.tight_layout()
plt.show()

## 4. Anomaly Detection

Identify potential anomalies using simple threshold-based detection.

In [None]:
# Define thresholds (mean ± 2 standard deviations)
temp_threshold = df['temperature'].std() * 2
temp_mean = df['temperature'].mean()

# Identify anomalies
anomalies = df[
    (df['temperature'] > temp_mean + temp_threshold) | 
    (df['temperature'] < temp_mean - temp_threshold)
]

print(f"Found {len(anomalies)} anomalies")
anomalies

## 5. Export Results

Save processed data to CSV.

In [None]:
# Export to CSV
output_path = '../data/processed_data.csv'
df.to_csv(output_path, index=False)
print(f"Data exported to {output_path}")