In [14]:
!pip install -q --upgrade google-generativeai
!pip install faiss-cpu



Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m47.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [3]:
import google.generativeai as genai
from kaggle_secrets import UserSecretsClient

try:
    api_key = UserSecretsClient().get_secret("GEMINI_API_KEY")
    genai.configure(api_key=api_key)

    test_model = genai.GenerativeModel("models/gemini-pro")
    test_response = test_model.generate_content("Test prompt: Is Gemini Pro ready?")
    print("✅ Gemini API is working!")

except Exception as e:
    print("❌ Gemini API setup failed:", str(e))


❌ Gemini API setup failed: 404 models/gemini-pro is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.


In [8]:
import pandas as pd

try:
    df = pd.read_csv("/kaggle/input/cybersecurity-attacks1-csv/cybersecurity_attacks.csv")
    print("✅ Dataset loaded. Shape:", df.shape)
except Exception as e:
    print("❌ Error loading dataset:", str(e))


✅ Dataset loaded. Shape: (40000, 25)


In [9]:
prompt_template = """
You are a cybersecurity SOC AI Assistant. Given a detailed network log, analyze the event and return:

1. A clear summary of what happened.
2. The MITRE ATT&CK technique ID and name (if applicable).
3. Recommended response actions for the SOC analyst.

Example:

Log:
Timestamp: 2023-05-30 06:33:58  
Source IP Address: 103.216.15.12  
Destination IP Address: 84.9.164.252  
Protocol: ICMP  
Packet Type: Data  
Traffic Type: HTTP  
Payload Data: Qui natus odio asperiores nam. Optio nobis iusto eos.  
Severity Level: Low  
User: Reyansh Dugal  
Action Taken: Logged

Summary: On May 30, 2023, a low-severity ICMP data packet was logged from source 103.216.15.12 targeting 84.9.164.252 over HTTP. The payload did not indicate clear malicious intent, and the event was logged for reference.

MITRE: None applicable (no suspicious behavior detected)

Actions: Monitor for repeat occurrences, correlate with other logs if similar patterns reappear.

---

Log:
{log}
Summary:
"""


In [10]:
def summarize_log(log_text):
    try:
        prompt = prompt_template.format(log=log_text)
        model = genai.GenerativeModel("models/gemini-pro")
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"❌ Error during summarization: {str(e)}"


In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer
import faiss
import numpy as np

try:
    # Convert DataFrame rows into text blocks
    logs_as_text = df.astype(str).apply(lambda row: "\n".join(f"{col}: {val}" for col, val in row.items()), axis=1).tolist()

    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(logs_as_text).toarray().astype("float32")

    index = faiss.IndexFlatL2(X.shape[1])
    index.add(X)

    def search_similar_logs(query, top_k=2):
        try:
            q_vec = vectorizer.transform([query]).toarray().astype("float32")
            _, idxs = index.search(q_vec, top_k)
            return [logs_as_text[i] for i in idxs[0]]
        except Exception as e:
            return [f"❌ Error during similarity search: {str(e)}"]

    print("✅ Vector search engine initialized!")

except Exception as e:
    print("❌ Vector engine setup failed:", str(e))


✅ Vector search engine initialized!


In [16]:
def agent_analysis_with_context(log_text):
    try:
        context_logs = search_similar_logs(log_text)
        context = "\n\n---\n\n".join(context_logs)

        agent_prompt = f"""
You are an advanced SOC AI Agent. Analyze the following log with the help of historical context.

Log:
{log_text}

Relevant Past Logs:
{context}

Explain what happened, suggest MITRE ATT&CK techniques, and recommend response actions.
"""
        model = genai.GenerativeModel("models/gemini-pro")
        response = model.generate_content(agent_prompt)
        return response.text
    except Exception as e:
        return f"❌ Error during agent analysis: {str(e)}"


In [17]:
try:
    first_log = df.iloc[0].to_string()
    print("🔍 Raw Log:\n", first_log)

    summary = summarize_log(first_log)
    print("\n📝 Summary Output:\n", summary)

    reasoning = agent_analysis_with_context(first_log)
    print("\n🧠 Agent Reasoning with Context:\n", reasoning)

except Exception as e:
    print("❌ Error during processing:", str(e))


🔍 Raw Log:
 Timestamp                                               2023-05-30 06:33:58
Source IP Address                                             103.216.15.12
Destination IP Address                                         84.9.164.252
Source Port                                                           31225
Destination Port                                                      17616
Protocol                                                               ICMP
Packet Length                                                           503
Packet Type                                                            Data
Traffic Type                                                           HTTP
Payload Data              Qui natus odio asperiores nam. Optio nobis ius...
Malware Indicators                                             IoC Detected
Anomaly Scores                                                        28.67
Attack Type                                                         Malware
