In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## 👩‍🔬 About the Researcher

My name is Sahar Zargarzadeh, a PhD researcher working in the field of **Adversarial Machine Learning and Power-Aware IoT Security**. My research focuses on how power consumption data in embedded and IoT systems can reveal patterns — and vulnerabilities — that adversaries can exploit or conceal. Specifically, I investigate how adversarial behavior affects power signals and how AI/ML models can detect or be misled by such patterns.

## 💬 Why This Chatbot?

As part of this 5-day Generative AI Capstone project, I developed an **explainable AI assistant** to interpret and explain anomalies in IoT power signals. This chatbot was built using **Retrieval-Augmented Generation (RAG)** and **Gemini 1.5 Pro**, and allows users to ask questions about unusual power behavior (like sudden drops or high-frequency noise) in natural language.

This directly supports my research goals — making AI-powered anomaly detection not only accurate but also understandable and transparent to users, especially in real-world cybersecurity applications.


# An Explainable AI Assistant for IoT Power Anomaly Detection using Gemini and RAG¶

**In this capstone project for the 5-Day Generative AI Intensive Course with Google, we propose an explainable AI assistant capable of analyzing and interpreting IoT power signals for anomalies.**

* *The assistant is built using Gemini 1.5 Pro and leverages Retrieval-Augmented Generation (RAG) to ground its responses in relevant context. The goal is to not only detect anomalies but to explain them in human-understandable language, making the solution useful for technicians, engineers, and non-technical stakeholders alike.*

* *We apply this solution to simulated power signals, a domain relevant to smart homes, industrial sensors, and IoT device monitoring. Anomalies such as spikes, drops, and noise often indicate system faults, cyberattacks, or device misbehavior. Traditional models flag them—but this assistant explains them.*

**GenAI Capabilities Demonstrated**

> This notebook showcases three core GenAI capabilities:

1. Few-shot Prompting – teaching Gemini to classify and explain different power signal patterns by example.

2. Embedding + Similarity Search – transforming signals into vector space to find semantically similar examples.

3. Grounding with RAG – augmenting model responses with domain-specific documentation for trustworthy, verifiable answers.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random
import time
from google.generativeai import configure, GenerativeModel

from google import genai
from google.genai import types

from IPython.display import Markdown, display

genai.__version__


In [None]:
import os

# List files inside the input folder
print("Available folders in /kaggle/input:")
print(os.listdir('/kaggle/input'))

# List files inside your specific dataset
print("\nFiles in your dataset:")
print(os.listdir('/kaggle/input/iot-power-signals-csv'))


In [None]:
df = pd.read_csv("/kaggle/input/iot-power-signals-csv/IoT_Power_Signal_Dataset.csv")
df.head()


In [None]:
plt.figure(figsize=(12, 5))
for label in df['task'].unique():
    subset = df[df['task'] == label]
    plt.plot(subset['timestamp'], subset['power'], label=label)
plt.legend()
plt.title("Power Consumption Over Time by Task")
plt.xlabel("Timestamp")
plt.ylabel("Power (mW)")
plt.show()


In [None]:
from kaggle_secrets import UserSecretsClient
import google.generativeai as genai

# Securely fetch API key
api_key = UserSecretsClient().get_secret("GOOGLE_API_KEY")

# Configure the Gemini client
genai.configure(api_key=api_key)

# Test it
model = genai.GenerativeModel("gemini-1.5-pro")

response = model.generate_content("Hello Gemini, what can you do?")
print(response.text)


# **GenAI Capability 1: Few-shot Prompting**

*Let’s use the Gemini API to interpret patterns in power behavior using a prompt.*

In [None]:
prompt = """
You are analyzing power data from an IoT device. Based on prior examples:
- 'blink' task = stable sinusoidal power pattern
- 'dosTask' = erratic spikes and sustained high usage
Use the following power segment to interpret the behavior.

Power readings:
[153.0, 151.5, 152.2, 189.4, 192.0, 195.3, 150.2]

Explain what likely caused this behavior.
"""

response = model.generate_content(prompt)
print(response.text)

# **GenAI Capability 2: Embedding + Similarity**

*We'll use Gemini embeddings to compare a new segment to labeled historical examples.*

In [None]:
import google.generativeai as genai

# This uses the correct embedding model
response = genai.embed_content(
    model="models/embedding-001",
    content="Sudden drop in power with no recovery",
    task_type="retrieval_document"  # You can also try "semantic_similarity"
)

test_embedding = response['embedding']
print(f"Length of embedding: {len(test_embedding)}")


In [None]:
example_texts = [
    "Regular IoT power usage pattern",
    "Short sudden power spike with recovery",
    "Consistent high-frequency power noise",
    "Power drop followed by irregular signal"
]

# Embed each example text
example_embeddings = [
    genai.embed_content(model="models/embedding-001", content=text, task_type="retrieval_document")["embedding"]
    for text in example_texts
]


In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Compute cosine similarities
similarities = cosine_similarity([test_embedding], example_embeddings)

# Print similarity scores
for i, score in enumerate(similarities[0]):
    print(f"Similarity to Example {i+1} ('{example_texts[i]}'): {score:.4f}")


# **GenAI Capability 3: Grounding with Simulated Docs**

*You can simulate grounding by appending task descriptions from your documentation.*

In [None]:
support_docs = {
    "doc1": "A sudden drop in power followed by erratic patterns may indicate an IoT device reboot or firmware update in progress.",
    "doc2": "High-frequency noise in power signals could be caused by switching regulators or nearby electromagnetic interference sources.",
    "doc3": "Power spikes are often due to sensor initialization or communication surges during device boot.",
    "doc4": "A normal IoT power pattern is typically consistent, with small fluctuations during data transmission intervals."
}


In [None]:
# Reuse the genai.embed_content directly
doc_embeddings = {}
for key, text in support_docs.items():
    response = genai.embed_content(
        model="models/embedding-001",
        content=text,
        task_type="retrieval_document"
    )
    doc_embeddings[key] = response["embedding"]



In [None]:
query = "Why does my power signal have sudden drop and noise afterward?"

query_embedding = genai.embed_content(
    model="models/embedding-001",
    content=query,
    task_type="retrieval_query"
)["embedding"]



In [None]:
import numpy as np

def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

similarities = {
    doc_id: cosine_similarity(query_embedding, emb)
    for doc_id, emb in doc_embeddings.items()
}

most_relevant_doc = max(similarities, key=similarities.get)
print("Most relevant document:", support_docs[most_relevant_doc])


In [None]:
from google.generativeai import GenerativeModel

# Define the Gemini Pro model
gemini_pro = GenerativeModel("gemini-1.5-pro")


In [None]:
context = support_docs[most_relevant_doc]
prompt = f"""You are a smart IoT power monitoring assistant. A user asked:
'{query}'

Based on this reference document:
\"\"\"{context}\"\"\"

Give your answer in simple terms."""
    
response = gemini_pro.generate_content(prompt)
print(response.text)


# **GenAI Capability 4: Conversational Assistant for IoT Anomaly Explanations**

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def get_most_similar_doc(user_input, doc_embeddings, embed_model):
    # Embed the user question
    response = genai.embed_content(
    model="models/embedding-001",
    content=user_input,
    task_type="retrieval_query"
)

    query_embedding = response['embedding']

    # Compute similarity scores with your document embeddings
    scores = {
        key: cosine_similarity([query_embedding], [embedding])[0][0]
        for key, embedding in doc_embeddings.items()
    }

    # Return the most similar doc
    most_similar_key = max(scores, key=scores.get)
    return support_docs[most_similar_key]


In [None]:
gemini_pro = GenerativeModel("gemini-1.5-pro")


In [None]:
def chat_with_ai():
    print("💬 AI IoT Chatbot is ready. Type 'exit' to quit.")
    while True:
        user_input = input("🧑 You: ")
        if user_input.lower() == "exit":
            print("👋 Goodbye!")
            break

        context_doc = get_most_similar_doc(user_input, doc_embeddings, embed_model)
        
        prompt = f"""You are an IoT power signal expert.
Use the following document to help answer the question:

Context:\n{context_doc}

Question: {user_input}
Answer:"""

        response = gemini_pro.generate_content(prompt)
        print("🤖 Gemini:", response.text.strip())


In [None]:
#chat_with_ai()


# **Conclusion**


*This project demonstrates how Generative AI can be used not just for automation, but for understanding. By combining Gemini’s reasoning capabilities with semantic embeddings and grounding via RAG, we’ve built an assistant that bridges the gap between anomaly detection and anomaly explanation.Such assistants could be deployed in smart grid monitoring, IoT security, or predictive maintenance, empowering users to act on insights with confidence.*
