In [1]:
!pip install -q --upgrade google-generativeai
!pip install faiss-cpu

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/155.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m153.6/155.4 kB[0m [31m7.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m155.4/155.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.2/13.2 MB[0m [31m108.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m47.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [2]:
import google.generativeai as genai

try:

  from google.colab import userdata
  api_key = userdata.get('GEMINI_API_KEY')
  genai.configure(api_key=api_key)

except Exception as e:
    print("❌ Gemini API setup failed:", str(e))


In [3]:
import pandas as pd

try:
    url = "https://raw.githubusercontent.com/incribo-inc/cybersecurity_attacks/37f147e47d8772ae7fdd6713b25ea33395e0ddc3/cybersecurity_attacks.csv"
    df = pd.read_csv(url)
    print("✅ Dataset loaded from GitHub. Shape:", df.shape)
except Exception as e:
    print("❌ Error loading dataset from GitHub:", str(e))


✅ Dataset loaded from GitHub. Shape: (40000, 25)


In [4]:
prompt_template = """
You are a cybersecurity SOC AI Assistant. Given a detailed network log, analyze the event and return:

1. A clear summary of what happened.
2. The MITRE ATT&CK technique ID and name (if applicable).
3. Recommended response actions for the SOC analyst.

Example:

Log:
Timestamp: 2023-05-30 06:33:58
Source IP Address: 103.216.15.12
Destination IP Address: 84.9.164.252
Protocol: ICMP
Packet Type: Data
Traffic Type: HTTP
Payload Data: Qui natus odio asperiores nam. Optio nobis iusto eos.
Severity Level: Low
User: Reyansh Dugal
Action Taken: Logged

Summary: On May 30, 2023, a low-severity ICMP data packet was logged from source 103.216.15.12 targeting 84.9.164.252 over HTTP. The payload did not indicate clear malicious intent, and the event was logged for reference.

MITRE: None applicable (no suspicious behavior detected)

Actions: Monitor for repeat occurrences, correlate with other logs if similar patterns reappear.

---

Log:
{log}
Summary:
"""


In [5]:
def summarize_log(log_text):
    try:
        prompt = prompt_template.format(log=log_text)
        model = genai.GenerativeModel("models/gemini-pro")
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"❌ Error during summarization: {str(e)}"


In [6]:
!pip install faiss-cpu



In [7]:
import faiss

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

In [9]:
try:
    # Convert DataFrame rows into text blocks
    logs_as_text = df.astype(str).apply(lambda row: "\n".join(f"{col}: {val}" for col, val in row.items()), axis=1).tolist()

    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(logs_as_text).toarray().astype("float32")

    index = faiss.IndexFlatL2(X.shape[1])
    index.add(X)

    def search_similar_logs(query, top_k=2):
        try:
            q_vec = vectorizer.transform([query]).toarray().astype("float32")
            _, idxs = index.search(q_vec, top_k)
            return [logs_as_text[i] for i in idxs[0]]
        except Exception as e:
            return [f"❌ Error during similarity search: {str(e)}"]

    print("✅ Vector search engine initialized!")

except Exception as e:
    print("❌ Vector engine setup failed:", str(e))


✅ Vector search engine initialized!


In [10]:
def agent_analysis_with_context(log_text):
    try:
        context_logs = search_similar_logs(log_text)
        context = "\n\n---\n\n".join(context_logs)

        agent_prompt = f"""
You are an advanced SOC AI Agent. Analyze the following log with the help of historical context.

Log:
{log_text}

Relevant Past Logs:
{context}

Explain what happened, suggest MITRE ATT&CK techniques, and recommend response actions.
"""
        model = genai.GenerativeModel("models/gemini-pro")
        response = model.generate_content(agent_prompt)
        return response.text
    except Exception as e:
        return f"❌ Error during agent analysis: {str(e)}"


In [11]:
try:
    first_log = df.iloc[0].to_string()
    print("🔍 Raw Log:\n", first_log)

    summary = summarize_log(first_log)
    print("\n📝 Summary Output:\n", summary)

    reasoning = agent_analysis_with_context(first_log)
    print("\n🧠 Agent Reasoning with Context:\n", reasoning)

except Exception as e:
    print("❌ Error during processing:", str(e))


🔍 Raw Log:
 Timestamp                                               2023-05-30 06:33:58
Source IP Address                                             103.216.15.12
Destination IP Address                                         84.9.164.252
Source Port                                                           31225
Destination Port                                                      17616
Protocol                                                               ICMP
Packet Length                                                           503
Packet Type                                                            Data
Traffic Type                                                           HTTP
Payload Data              Qui natus odio asperiores nam. Optio nobis ius...
Malware Indicators                                             IoC Detected
Anomaly Scores                                                        28.67
Attack Type                                                         Malware





📝 Summary Output:
 ❌ Error during summarization: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-pro is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.

🧠 Agent Reasoning with Context:
 ❌ Error during agent analysis: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-pro is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.


