In [15]:
import openai
import pandas as pd
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Set your OpenAI API key here
openai.api_key = os.getenv("OPENAI_API_KEY")

# Load your CSV Q&A bank
qa_data = pd.read_csv("Cybersecurity_QA_500 .csv")  # keep the space exactly as your file name

# Combine questions and answers into searchable documents
qa_data['combined'] = qa_data['Question'] + " " + qa_data['Answer']

# Prepare TF-IDF vectorizer to search your local data
vectorizer = TfidfVectorizer().fit(qa_data['combined'])
tfidf_matrix = vectorizer.transform(qa_data['combined'])

# Function to search your CSV first
def search_local_qa(user_question, threshold=0.5):
    user_vec = vectorizer.transform([user_question])
    similarity = cosine_similarity(user_vec, tfidf_matrix)
    top_match_idx = similarity.argmax()
    top_score = similarity[0, top_match_idx]
    
    if top_score >= threshold:
        answer = qa_data.iloc[top_match_idx]['Answer']
        return f"(From your knowledge base)\n{answer}"
    else:
        return None

# Function to ask OpenAI if no good local match found
def ask_openai(user_question):
    response = openai.ChatCompletion.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a helpful cybersecurity assistant. You give simple, clear advice on how people can stay safe online."},
            {"role": "user", "content": user_question}
        ]
    )
    return response.choices[0].message.content

# Main chatbot function
def chatbot(user_question):
    local_answer = search_local_qa(user_question)
    if local_answer:
        return local_answer
    else:
        print("(No match found in local data, asking OpenAI...)")
        return ask_openai(user_question)

# Run the chatbot
if __name__ == "__main__":
    while True:
        user_input = input("Ask your cybersecurity question (type 'quit' to exit): ")
        if user_input.lower() == "quit":
            break
        answer = chatbot(user_input)
        print(answer)


Ask your cybersecurity question (type 'quit' to exit):  How do i secure my email account


(From your knowledge base)
Use a strong password, enable 2FA, and be cautious of suspicious emails.


Ask your cybersecurity question (type 'quit' to exit):  quit


In [17]:
import streamlit as st
import openai
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import os

# Set your OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")  # make sure to set this environment variable in deployment

# Load the CSV file (make sure this file is present in the same folder as the app)
qa_data = pd.read_csv("Cybersecurity_QA_500 .csv")  # note the space before .csv (keep it exactly as your file name)
qa_data['combined'] = qa_data['Question'] + " " + qa_data['Answer']

# Prepare TF-IDF vectorizer to search local data
vectorizer = TfidfVectorizer().fit(qa_data['combined'])
tfidf_matrix = vectorizer.transform(qa_data['combined'])

def search_local_qa(user_question, threshold=0.5):
    user_vec = vectorizer.transform([user_question])
    similarity = cosine_similarity(user_vec, tfidf_matrix)
    top_match_idx = similarity.argmax()
    top_score = similarity[0, top_match_idx]
    
    if top_score >= threshold:
        answer = qa_data.iloc[top_match_idx]['Answer']
        return f"(From your knowledge base)\n{answer}"
    else:
        return None

def ask_openai(user_question):
    response = openai.ChatCompletion.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a helpful cybersecurity assistant. You give simple, clear advice on how people can stay safe online."},
            {"role": "user", "content": user_question}
        ]
    )
    return response.choices[0].message.content

# Streamlit interface
st.title("Cybersecurity Chatbot")
st.write("Ask me anything about staying safe online!")

user_input = st.text_input("Your cybersecurity question:")

if user_input:
    with st.spinner('Thinking...'):
        local_answer = search_local_qa(user_input)
        if local_answer:
            st.success(local_answer)
        else:
            st.warning("No match in knowledge base. Asking AI...")
            ai_answer = ask_openai(user_input)
            st.success(ai_answer)



2025-06-18 13:23:55.278 
  command:

    streamlit run C:\Users\Hp\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-06-18 13:23:55.278 Session state does not function when running a script without `streamlit run`
