<a href="https://colab.research.google.com/github/Kl2400031949/FDS/blob/main/Untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import random
import time
import pandas as pd
from datetime import datetime, timedelta

# -------------------------------
# 1. Data Generation (Sensors)
# -------------------------------
def generate_sensor_data(sensor_id):
    """Simulate a temperature sensor reading"""
    return {
        "sensor_id": sensor_id,
        "temperature_c": round(random.uniform(20.0, 50.0), 2),
        "timestamp": datetime.utcnow()
    }
    # Simulate multiple sensors
sensors = ["MACHINE_01", "MACHINE_02", "MACHINE_03"]

raw_data = []
for _ in range(10):  # generate 10 readings per sensor
    for sensor in sensors:
        raw_data.append(generate_sensor_data(sensor))
    time.sleep(0.2)  # simulate time between readings

# Convert to DataFrame
df_raw = pd.DataFrame(raw_data)
print("1. Raw Sensor Data:")
print(df_raw.head(), "\n")

# 2. Data Validation & Quality Checks
# -------------------------------
def validate_data(df):
    """Remove invalid readings"""
    # Temperature must be between -20 and 200 Celsius
    df_valid = df[(df['temperature_c'] >= -20) & (df['temperature_c'] <= 200)]
    # Remove missing sensor_id or timestamp
    df_valid = df_valid.dropna(subset=['sensor_id', 'timestamp'])
    return df_valid

df_valid = validate_data(df_raw)
print("2. Validated Data:")
print(df_valid.head(), "\n")

# -------------------------------
# 3. Data Transformation
# -------------------------------
def transform_data(df):
    """Example transformations: Celsius -> Fahrenheit, add machine metadata"""
    df = df.copy()
    df['temperature_f'] = df['temperature_c'] * 9/5 + 32

    # Example: Add machine location metadata
    machine_locations = {
        "MACHINE_01": "Line 1",
        "MACHINE_02": "Line 2",
        "MACHINE_03": "Line 3",

    }
    df['location'] = df['sensor_id'].map(machine_locations)

    # Example: Add rolling 1-minute average (simulated here)
    df['rolling_avg'] = df.groupby('sensor_id')['temperature_c'].transform(lambda x: x.rolling(3, min_periods=1).mean())

    return df

df_transformed = transform_data(df_valid)
print("3. Transformed Data:")
print(df_transformed.head(), "\n")

# -------------------------------
# 4. Data Storage (Simulated)
# -------------------------------
# In a real scenario, data would go to a database/data warehouse
# Here, we simulate storage as a CSV file
df_transformed.to_csv("processed_sensor_data.csv", index=False)
print("4. Data stored to processed_sensor_data.csv\n")

# -------------------------------
# 5. Data Serving / Dashboard Ready
# -------------------------------
# For a dashboard, we prepare aggregated metrics
def dashboard_ready(df):
    """Aggregate data per machine for dashboard"""
    agg = df.groupby('sensor_id').agg(
        avg_temp_c=pd.NamedAgg(column='temperature_c', aggfunc='mean'),
        max_temp_c=pd.NamedAgg(column='temperature_c', aggfunc='max'),
        latest_timestamp=pd.NamedAgg(column='timestamp', aggfunc='max')
    ).reset_index()
    return agg

df_dashboard = dashboard_ready(df_transformed)
print("5. Dashboard-Ready Data:")
print(df_dashboard.head(), "\n")

# -------------------------------
# 6. Alerts (Optional)
# -------------------------------
# Identify machines exceeding a threshold
threshold = 90
alerts = df_dashboard[df_dashboard['max_temp_c'] > threshold]
if not alerts.empty:
    print("ALERT: Machines exceeding temperature threshold!")
    print(alerts)
else:
    print("No alerts. All machines within safe temperature range.")



  "timestamp": datetime.utcnow()


1. Raw Sensor Data:
    sensor_id  temperature_c                  timestamp
0  MACHINE_01          37.26 2026-01-02 04:07:43.415212
1  MACHINE_02          44.96 2026-01-02 04:07:43.415232
2  MACHINE_03          34.00 2026-01-02 04:07:43.415238
3  MACHINE_01          46.17 2026-01-02 04:07:43.615458
4  MACHINE_02          45.85 2026-01-02 04:07:43.615475 

2. Validated Data:
    sensor_id  temperature_c                  timestamp
0  MACHINE_01          37.26 2026-01-02 04:07:43.415212
1  MACHINE_02          44.96 2026-01-02 04:07:43.415232
2  MACHINE_03          34.00 2026-01-02 04:07:43.415238
3  MACHINE_01          46.17 2026-01-02 04:07:43.615458
4  MACHINE_02          45.85 2026-01-02 04:07:43.615475 

3. Transformed Data:
    sensor_id  temperature_c                  timestamp  temperature_f  \
0  MACHINE_01          37.26 2026-01-02 04:07:43.415212         99.068   
1  MACHINE_02          44.96 2026-01-02 04:07:43.415232        112.928   
2  MACHINE_03          34.00 2026-01-02 04