In [1]:
import json
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from fastapi import FastAPI, UploadFile, File
from typing import List
import google.generativeai as genai
from PIL import Image
import io
import PyPDF2
import tkinter as tk
from tkinter import filedialog

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
app = FastAPI()

In [3]:
def load_json(filename):
    with open(filename, 'r', encoding='utf-8') as f:
        return json.load(f)

In [4]:
def extract_text(data):
    extracted_texts = []
    for post in data:
        title = post.get('title', '')
        post_text = post.get('post_text', '')  
        extracted_texts.append(f"{title} {post_text}")
    return extracted_texts

In [5]:
json_file = "reddit_posts.json"
data = load_json(json_file)
documents = extract_text(data)

In [6]:
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedding_model.encode(documents)

In [7]:
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(np.array(embeddings))
faiss.write_index(index, "faiss_index.bin")

print("FAISS index built successfully!")

FAISS index built successfully!


In [8]:
def load_faiss_index():
    index = faiss.read_index("faiss_index.bin")
    return index

index = load_faiss_index()

In [9]:

genai.configure(api_key="AIzaSyC5sKv5vQycUMs-bp98bK_Mo4WLRf3FOLY")

In [10]:
def retrieve_relevant_text(query, k=5):
    query_embedding = embedding_model.encode([query])
    _, indices = index.search(np.array(query_embedding), k)
    return list(set(documents[i] for i in indices[0]))  # Remove duplicate responses

In [11]:
def generate_response(query, context=None):
    model = genai.GenerativeModel("gemini-2.0-flash")
    if context is None:
        context = "\n".join(retrieve_relevant_text(query))
    
    print(f"Processing query: {query}")  # Debug log to track query history
    
    prompt = f"""
    You are an expert in cybersecurity trained on relevant cybersecurity discussions. Answer concisely, avoid repetition, and provide accurate information.

    Context: {context}
    User Query: {query}
    
    If the query is unrelated to cybersecurity, reply: "I can only assist with cybersecurity-related questions."
    Keep responses unique and under 200 words.
    """
    try:
        response = model.generate_content(prompt)
        return response.text.strip()
    except Exception as e:
        return f"Error: {str(e)}"

In [12]:
def extract_text_from_pdf(pdf_file):
    reader = PyPDF2.PdfReader(pdf_file)
    text = "".join([page.extract_text() for page in reader.pages if page.extract_text()])
    return text

In [13]:
@app.post("/query")
def query_text(prompt: str):
    relevant_texts = retrieve_relevant_text(prompt)
    response = generate_response(prompt, "\n".join(relevant_texts))
    return {"response": response}


In [14]:
@app.post("/query_with_image")
def query_with_image(prompt: str, file: UploadFile = File(...)):
    image = Image.open(io.BytesIO(file.file.read()))
    model = genai.GenerativeModel("gemini-2.0-flash")
    response = model.generate_content([prompt, image])
    return {"response": response.text}

In [15]:
@app.post("/query_with_pdf")
def query_with_pdf(prompt: str, file: UploadFile = File(...)):
    text = extract_text_from_pdf(file.file)
    relevant_texts = retrieve_relevant_text(text)
    response = generate_response(prompt, "\n".join(relevant_texts))
    return {"response": response}

In [16]:
def open_file_dialog(file_types):
    root = tk.Tk()
    root.withdraw()
    root.lift()  
    root.attributes("-topmost", True)  
    file_path = filedialog.askopenfilename(filetypes=file_types)
    root.destroy()  
    return file_path

In [None]:
def chatbot():
    print("Cybersecurity AI Chatbot. Type 'exit' to quit.")
    last_query = ""
    last_response = ""
    
    while True:
        user_input = input("Enter query ('image'/'pdf' to upload, 'refresh' to regenerate): ")
        
        if user_input.lower() == 'exit':
            break
        elif user_input.lower() == 'refresh':
            if last_query:
                print("Regenerating response...")
                response = generate_response(last_query)
                if response != last_response:
                    print("Response:", response)
                    last_response = response
                else:
                    print("Generated response is identical, modifying output...")
                    response = generate_response(last_query + " Provide a different perspective.")  # Ensures a new response
                    print("Response:", response)
            else:
                print("No previous query found. Enter a new one first.")
        elif user_input.lower() == 'image':
            image_path = open_file_dialog([( "Image Files", "*.png;*.jpg;*.jpeg;*.bmp;*.gif")])
            if image_path:
                with open(image_path, "rb") as img_file:
                    image = Image.open(img_file)
                    model = genai.GenerativeModel("gemini-2.0-flash")
                    print("Processing image...")
                    response = model.generate_content(["Analyze this image", image])
                    print("Response:", response.text.strip())
        elif user_input.lower() == 'pdf':
            pdf_path = open_file_dialog([( "PDF Files", "*.pdf")])
            if pdf_path:
                with open(pdf_path, "rb") as pdf_file:
                    text = extract_text_from_pdf(pdf_file)
                    print("Processing PDF...")
                    response = generate_response(user_input, context=text)
                    print("Response:", response)
                    last_query = user_input
        else:
            print("Processing query...")
            response = generate_response(user_input)
            print("Response:", response)
            last_query, last_response = user_input, response

if __name__ == "__main__":
    print("API and Chatbot are running!")
    chatbot()


API and Chatbot are running!
Cybersecurity AI Chatbot. Type 'exit' to quit.
Processing query...
Processing query: what is cybersecurity
Response: Cybersecurity is the practice of protecting computer systems, networks, and digital information from unauthorized access, damage, theft, or disruption. It involves implementing technologies, processes, and controls to ensure confidentiality, integrity, and availability of data. Key aspects include:

*   **Risk Management:** Identifying and mitigating potential threats and vulnerabilities.
*   **Network Security:** Securing networks through firewalls, intrusion detection systems, and access controls.
*   **Endpoint Security:** Protecting individual devices like computers and mobile phones with antivirus software and endpoint detection and response (EDR) solutions.
*   **Data Security:** Implementing encryption and data loss prevention (DLP) measures to safeguard sensitive information.
*   **Incident Response:** Developing plans to effectively 