You can also github google collab version of notebook in https://github.com/AI-Cybersec/AI-Agents-Cybersecurity/blob/01d370411370d00a378682bf3b8fe227a7701ad7/SOC%20Assistant%20AI%20Agent%20Github/SOC_AI_Assistant%20gemini%20api%20pro.ipynb

In [1]:
!pip install -q --upgrade google-generativeai
!pip install faiss-cpu

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m155.4/155.4 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m45.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [2]:
import google.generativeai as genai
import pandas as pd
import faiss
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np


In [4]:
try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    secret_value_0 = user_secrets.get_secret("GEMINI_API_KEY")

except Exception as e:
    print("❌ Gemini API setup failed:", str(e))

In [5]:
try:
    url = "https://raw.githubusercontent.com/incribo-inc/cybersecurity_attacks/37f147e47d8772ae7fdd6713b25ea33395e0ddc3/cybersecurity_attacks.csv"
    df = pd.read_csv(url)
    print("✅ Dataset loaded from GitHub. Shape:", df.shape)
except Exception as e:
    print("❌ Error loading dataset from GitHub:", str(e))

✅ Dataset loaded from GitHub. Shape: (40000, 25)


In [6]:
df.head()

Unnamed: 0,Timestamp,Source IP Address,Destination IP Address,Source Port,Destination Port,Protocol,Packet Length,Packet Type,Traffic Type,Payload Data,...,Action Taken,Severity Level,User Information,Device Information,Network Segment,Geo-location Data,Proxy Information,Firewall Logs,IDS/IPS Alerts,Log Source
0,2023-05-30 06:33:58,103.216.15.12,84.9.164.252,31225,17616,ICMP,503,Data,HTTP,Qui natus odio asperiores nam. Optio nobis ius...,...,Logged,Low,Reyansh Dugal,Mozilla/5.0 (compatible; MSIE 8.0; Windows NT ...,Segment A,"Jamshedpur, Sikkim",150.9.97.135,Log Data,,Server
1,2020-08-26 07:08:30,78.199.217.198,66.191.137.154,17245,48166,ICMP,1174,Data,HTTP,Aperiam quos modi officiis veritatis rem. Omni...,...,Blocked,Low,Sumer Rana,Mozilla/5.0 (compatible; MSIE 8.0; Windows NT ...,Segment B,"Bilaspur, Nagaland",,Log Data,,Firewall
2,2022-11-13 08:23:25,63.79.210.48,198.219.82.17,16811,53600,UDP,306,Control,HTTP,Perferendis sapiente vitae soluta. Hic delectu...,...,Ignored,Low,Himmat Karpe,Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ...,Segment C,"Bokaro, Rajasthan",114.133.48.179,Log Data,Alert Data,Firewall
3,2023-07-02 10:38:46,163.42.196.10,101.228.192.255,20018,32534,UDP,385,Data,HTTP,Totam maxime beatae expedita explicabo porro l...,...,Blocked,Medium,Fateh Kibe,Mozilla/5.0 (Macintosh; PPC Mac OS X 10_11_5; ...,Segment B,"Jaunpur, Rajasthan",,,Alert Data,Firewall
4,2023-07-16 13:11:07,71.166.185.76,189.243.174.238,6131,26646,TCP,1462,Data,DNS,Odit nesciunt dolorem nisi iste iusto. Animi v...,...,Blocked,Low,Dhanush Chad,Mozilla/5.0 (compatible; MSIE 5.0; Windows NT ...,Segment C,"Anantapur, Tripura",149.6.110.119,,Alert Data,Firewall


In [7]:
prompt_template = """
You are a cybersecurity SOC AI Assistant. Given a detailed network log, analyze the event and return:

1. A clear summary of what happened.
2. The MITRE ATT&CK technique ID and name (if applicable).
3. Recommended response actions for the SOC analyst.

Example:

Log:
Timestamp: 2023-05-30 06:33:58
Source IP Address: 103.216.15.12
Destination IP Address: 84.9.164.252
Protocol: ICMP
Packet Type: Data
Traffic Type: HTTP
Payload Data: Qui natus odio asperiores nam. Optio nobis iusto eos.
Severity Level: Low
User: Reyansh Dugal
Action Taken: Logged

Summary: On May 30, 2023, a low-severity ICMP data packet was logged from source 103.216.15.12 targeting 84.9.164.252 over HTTP. The payload did not indicate clear malicious intent, and the event was logged for reference.

MITRE: None applicable (no suspicious behavior detected)

Actions: Monitor for repeat occurrences, correlate with other logs if similar patterns reappear.

---

Log:
{log}
Summary:
"""

In [8]:
def summarize_log(log_text):
    try:
        prompt = prompt_template.format(log=log_text)
        model = genai.GenerativeModel("models/gemini-pro")
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"❌ Error during summarization: {str(e)}"

In [9]:
try:
    # Convert DataFrame rows into text blocks
    logs_as_text = df.astype(str).apply(lambda row: "\n".join(f"{col}: {val}" for col, val in row.items()), axis=1).tolist()

    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(logs_as_text).toarray().astype("float32")

    index = faiss.IndexFlatL2(X.shape[1])
    index.add(X)

    def search_similar_logs(query, top_k=2):
        try:
            q_vec = vectorizer.transform([query]).toarray().astype("float32")
            _, idxs = index.search(q_vec, top_k)
            return [logs_as_text[i] for i in idxs[0]]
        except Exception as e:
            return [f"❌ Error during similarity search: {str(e)}"]

    print("✅ Vector search engine initialized!")

except Exception as e:
    print("❌ Vector engine setup failed:", str(e))

✅ Vector search engine initialized!


In [10]:
def agent_analysis_with_context(log_text):
    try:
        context_logs = search_similar_logs(log_text)
        context = "\n\n---\n\n".join(context_logs)

        agent_prompt = f"""
You are an advanced SOC AI Agent. Analyze the following log with the help of historical context.

Log:
{log_text}

Relevant Past Logs:
{context}

Explain what happened, suggest MITRE ATT&CK techniques, and recommend response actions.
"""
        model = genai.GenerativeModel("models/gemini-pro")
        response = model.generate_content(agent_prompt)
        return response.text
    except Exception as e:
        return f"❌ Error during agent analysis: {str(e)}"

In [11]:
try:
    first_log = df.iloc[0].to_string()
    print("🔍 Raw Log:\n", first_log)

    summary = summarize_log(first_log)
    print("\n📝 Summary Output:\n", summary)

    reasoning = agent_analysis_with_context(first_log)
    print("\n🧠 Agent Reasoning with Context:\n", reasoning)

except Exception as e:
    print("❌ Error during processing:", str(e))

🔍 Raw Log:
 Timestamp                                               2023-05-30 06:33:58
Source IP Address                                             103.216.15.12
Destination IP Address                                         84.9.164.252
Source Port                                                           31225
Destination Port                                                      17616
Protocol                                                               ICMP
Packet Length                                                           503
Packet Type                                                            Data
Traffic Type                                                           HTTP
Payload Data              Qui natus odio asperiores nam. Optio nobis ius...
Malware Indicators                                             IoC Detected
Anomaly Scores                                                        28.67
Attack Type                                                         Malware
