In [1]:
# ==========================================
# 01_generate_and_train.ipynb
# ==========================================

import os
import joblib
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Prepare local directories (simulate S3)
os.makedirs("local_s3/raw", exist_ok=True)
os.makedirs("local_s3/training", exist_ok=True)
os.makedirs("local_s3/inference_input", exist_ok=True)

# Generate synthetic IoT data
np.random.seed(42)
num_samples = 1000

data = pd.DataFrame({
    "temperature": np.random.normal(60, 10, num_samples),
    "vibration": np.random.normal(5, 2, num_samples),
    "sound_level": np.random.normal(50, 5, num_samples),
})

# Define abnormal condition
data["abnormal"] = ((data["temperature"] > 75) | (data["vibration"] > 8)).astype(int)

# Save to raw S3 (simulation)
data.to_csv("local_s3/raw/sensor_data.csv", index=False)
print("Synthetic IoT data saved to local_s3/raw/sensor_data.csv")

# Train/test split
train_df, test_df = train_test_split(data, test_size=0.2, random_state=42)
train_df.to_csv("local_s3/training/train.csv", index=False)
test_df.to_csv("local_s3/inference_input/test.csv", index=False)
print("Train and test data saved")

# Train model (simulate SageMaker Training Job)
X_train = train_df[["temperature", "vibration", "sound_level"]]
y_train = train_df["abnormal"]

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

joblib.dump(model, "local_s3/training/model.pkl")
print("Model trained and saved to local_s3/training/model.pkl")

# 4Evaluate model performance
X_test = test_df[["temperature", "vibration", "sound_level"]]
y_test = test_df["abnormal"]
preds = model.predict(X_test)

print("\nModel Evaluation:")
print(classification_report(y_test, preds))


Synthetic IoT data saved to local_s3/raw/sensor_data.csv
Train and test data saved
Model trained and saved to local_s3/training/model.pkl

Model Evaluation:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       176
           1       1.00      1.00      1.00        24

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200



In [2]:
# ==========================================
# 02_batch_inference.ipynb
# ==========================================

import os
import json
import joblib
import pandas as pd
from datetime import datetime
import numpy as np

# Prepare local directories (simulate S3 + DynamoDB)
os.makedirs("local_s3/inference_output", exist_ok=True)
os.makedirs("local_dynamodb", exist_ok=True)

# Load model artifact
model = joblib.load("local_s3/training/model.pkl")
print("Loaded model from local_s3/training/model.pkl")

# Load new sensor data (for inference)
test_df = pd.read_csv("local_s3/inference_input/test.csv")
X_test = test_df[["temperature", "vibration", "sound_level"]]

# Run batch inference
preds = model.predict(X_test)
probs = model.predict_proba(X_test)[:, 1]

# Combine results
results = test_df.copy()
results["predicted_abnormal"] = preds
results["abnormal_probability"] = probs

# Save batch results to S3 (simulated)
results.to_csv("local_s3/inference_output/batch_results.csv", index=False)
print("Inference results saved to local_s3/inference_output/batch_results.csv")

# Simulate DynamoDB insert (as JSON)
records = []
for _, row in results.iterrows():
    record = {
        "device_id": f"machine_{np.random.randint(1, 10)}",
        "timestamp": datetime.now().isoformat(),
        "temperature": float(row["temperature"]),
        "vibration": float(row["vibration"]),
        "sound_level": float(row["sound_level"]),
        "predicted_abnormal": int(row["predicted_abnormal"]),
        "abnormal_probability": float(row["abnormal_probability"]),
    }
    records.append(record)

with open("local_dynamodb/inference_results.json", "w") as f:
    json.dump(records, f, indent=2)

print("Results saved to local_dynamodb/inference_results.json")


Loaded model from local_s3/training/model.pkl
Inference results saved to local_s3/inference_output/batch_results.csv
Results saved to local_dynamodb/inference_results.json
