##### Copyright 2025 Google LLC.

In [1]:
# @title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [2]:
!pip install -q google-generativeai faiss-cpu

In [3]:
import google.generativeai as genai
import os
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import faiss
import numpy as np

In [4]:
# Set your API key (Paste your key securely or use Kaggle secrets)
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("GOOGLE_API_KEY")

# Configure Gemini with the secure key
genai.configure(api_key=secret_value_0)

In [5]:
df = pd.read_csv("/kaggle/input/cybersecurity-attacks-csv/cybersecurity_attacks.csv")  # Adjust path as needed
df.head()

Unnamed: 0,Timestamp,Source IP Address,Destination IP Address,Source Port,Destination Port,Protocol,Packet Length,Packet Type,Traffic Type,Payload Data,...,Action Taken,Severity Level,User Information,Device Information,Network Segment,Geo-location Data,Proxy Information,Firewall Logs,IDS/IPS Alerts,Log Source
0,2023-05-30 06:33:58,103.216.15.12,84.9.164.252,31225,17616,ICMP,503,Data,HTTP,Qui natus odio asperiores nam. Optio nobis ius...,...,Logged,Low,Reyansh Dugal,Mozilla/5.0 (compatible; MSIE 8.0; Windows NT ...,Segment A,"Jamshedpur, Sikkim",150.9.97.135,Log Data,,Server
1,2020-08-26 07:08:30,78.199.217.198,66.191.137.154,17245,48166,ICMP,1174,Data,HTTP,Aperiam quos modi officiis veritatis rem. Omni...,...,Blocked,Low,Sumer Rana,Mozilla/5.0 (compatible; MSIE 8.0; Windows NT ...,Segment B,"Bilaspur, Nagaland",,Log Data,,Firewall
2,2022-11-13 08:23:25,63.79.210.48,198.219.82.17,16811,53600,UDP,306,Control,HTTP,Perferendis sapiente vitae soluta. Hic delectu...,...,Ignored,Low,Himmat Karpe,Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ...,Segment C,"Bokaro, Rajasthan",114.133.48.179,Log Data,Alert Data,Firewall
3,2023-07-02 10:38:46,163.42.196.10,101.228.192.255,20018,32534,UDP,385,Data,HTTP,Totam maxime beatae expedita explicabo porro l...,...,Blocked,Medium,Fateh Kibe,Mozilla/5.0 (Macintosh; PPC Mac OS X 10_11_5; ...,Segment B,"Jaunpur, Rajasthan",,,Alert Data,Firewall
4,2023-07-16 13:11:07,71.166.185.76,189.243.174.238,6131,26646,TCP,1462,Data,DNS,Odit nesciunt dolorem nisi iste iusto. Animi v...,...,Blocked,Low,Dhanush Chad,Mozilla/5.0 (compatible; MSIE 5.0; Windows NT ...,Segment C,"Anantapur, Tripura",149.6.110.119,,Alert Data,Firewall


In [6]:
model = genai.GenerativeModel("gemini-pro")

few_shot_template = """
You are a cybersecurity SOC analyst. Given a security log, perform:

1. A plain-English summary
2. MITRE ATT&CK Technique (if applicable)
3. Recommended response actions

Examples:

Log: User jsmith ran powershell -enc JAB...
Summary: User jsmith executed an obfuscated PowerShell command.
MITRE: T1059.001 (PowerShell)
Actions: Investigate the full command, isolate host, scan for persistence.

Log: {log}
Summary:"""


In [7]:
def analyze_log_with_gemini(log_text):
    prompt = few_shot_template.format(log=log_text)
    response = model.generate_content(prompt)
    return response.text


In [8]:
# Step 6: Vector Search with FAISS for RAG
# Convert logs to plain text for vectorization
doc_texts = df.astype(str).apply(lambda row: "\n".join(row), axis=1).tolist()

# TF-IDF + FAISS
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(doc_texts).toarray().astype('float32')

index = faiss.IndexFlatL2(X.shape[1])
index.add(X)

def search_similar_logs(query, top_k=2):
    q_vec = vectorizer.transform([query]).toarray().astype('float32')
    _, idxs = index.search(q_vec, top_k)
    return [doc_texts[i] for i in idxs[0]]

In [9]:
# Step 7: Agent-Style Reasoning with Context + Gemini

def agent_response_with_context(log_text):
    context_logs = search_similar_logs(log_text)
    context = "\n\n---\n\n".join(context_logs)
    
    prompt = f"""
As a SOC AI Assistant, analyze the following log entry in the context of previous similar logs.

Log Entry:
{log_text}

Relevant Historical Context:
{context}

Give your reasoning, possible threat type, and response actions.
"""
    response = model.generate_content(prompt)
    return response.text

In [10]:
# Step 8: Run Analysis on a Sample Entry
sample_log = df.iloc[0].to_string()

summary = analyze_log_with_gemini(sample_log)
reasoning = agent_response_with_context(sample_log)

print("📝 Structured Summary:\n", summary)
print("\n🧠 Agent Reasoning:\n", reasoning)

NotFound: 404 models/gemini-pro is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.