In [2]:
# In this api call is being made to chat gpt to check if its a follow up question or not

import json
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import ipywidgets as widgets
from ipywidgets import HTML, Layout
from IPython.display import display
from openai import OpenAI
import openai

class Chatbot:
    def __init__(self, file_paths, api_key):
        self.file_paths = file_paths
        openai.api_key = api_key
        self.knowledge_base = self.load_data()
        self.documents = self.prepare_documents()
        self.vectorizer = TfidfVectorizer()
        self.tfidf_matrix = self.vectorizer.fit_transform(self.documents)
        self.full_documents = self.knowledge_base
        self.conversation_history = []
        self.setup_interface()
        client = OpenAI()

    def load_data(self):
        all_data = []
        for file_path in self.file_paths:
            with open(file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
                all_data.extend(data)
        return all_data

    def concatenate_text(self, json_obj):
        fields = ['CVE_ID', 'Assigner', 'Description']
        concatenated_text = ' '.join(str(json_obj[field]) for field in fields if field in json_obj)
        return concatenated_text

    def prepare_documents(self):
        return [self.concatenate_text(item) for item in self.knowledge_base]

    def answer_question(self, query):
        query_vec = self.vectorizer.transform([query])
        similarities = cosine_similarity(query_vec, self.tfidf_matrix)[0]
        top_indices = np.argsort(similarities)[-10:][::-1]
        top_documents = [self.full_documents[i] for i in top_indices]
        return top_documents

    def should_use_previous_context(self, question):
        client = OpenAI()
        completion = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "Determine if the following question is a follow-up question."},
                {"role": "user", "content": question}
            ]
        )
        response = completion.choices[0].message.content.strip().lower()

        return 'yes' in response

    def handle_question(self, _=None):
        question = self.text_input.value
        self.conversation_history.append({"role": "user", "content": question})
        client = OpenAI()
        

        if self.should_use_previous_context(question):
            messages = [
                {"role": "system", "content": "You are an assistant that helps with CVE data. Respond with relevant CVE details."}
            ] + self.conversation_history
        else:
            answers = self.answer_question(question)
            json_string = json.dumps(answers, indent=4)
            messages = [
                {"role": "system", "content": "You are an assistant that helps with CVE data. Respond with relevant CVE details."}
            ] + self.conversation_history + [
                {"role": "assistant", "content": json_string}
            ]

        with self.results_output:
            display(widgets.HTML(f'<b><font color="Blue">Question: </font></b><b><font color="Black">{question}</font></b>'))
            for i in messages:
                print(i)
            try:
                completion = client.chat.completions.create(
                    model="gpt-3.5-turbo",
                    messages=messages
                )
                response = completion.choices[0].message.content
                self.conversation_history.append({"role": "assistant", "content": response})

                pretty_response = f"<pre>{response}</pre>"
                styled_response = f"""
                    <style>
                        pre {{
                            background-color: #f0f0f0;
                            border: 1px solid #ccc;
                            border-radius: 5px;
                            padding: 10px;
                            overflow: auto;
                            font-family: Consolas, monospace;
                            font-size: 13px;
                            line-height: 1.4;
                            word-wrap: break-word;
                            width: 1100px;
                        }}
                    </style>
                    <p>{pretty_response}</p>
                """

                html_widget = HTML(
                    value=styled_response,
                    layout=Layout(width='auto')
                )
                display(widgets.HTML(f'<b><font color="green">Answer:</font></b> '))
                display(html_widget)

            except openai.error.RateLimitError:
                print("API rate limit exceeded. Please try again later.")
            except openai.error.OpenAIError as e:
                print(f"An API error occurred: {e}")
            except openai.error.AuthenticationError:
                print("Authentication failed. Check your API key.")
            except Exception as e:
                print(f"An unexpected error occurred: {e}")

    def on_text_input_submit(self, change):
        self.handle_question(change)

    def setup_interface(self):
        self.text_input = widgets.Text(
            placeholder='Enter your question here',
            description='Question:',
            layout=widgets.Layout(width='300px', height='100px')
        )
        self.text_input.style = {'description_width': 'initial'}
        self.text_input.on_submit(self.on_text_input_submit)
        self.results_output = widgets.Output()
        display(self.text_input, self.results_output)

# Initialize the chatbot with multiple files
file_paths = [
    'nvdcve-1.1-recent_updated',
    'nvdcve-1.1-modified_updated',
    'nvdcve-1.1-2024_updated.json',
 'nvdcve-1.1-2023_updated.json',
 'nvdcve-1.1-2022_updated.json',
 'nvdcve-1.1-2021_updated.json',
 'nvdcve-1.1-2020_updated.json',
 'nvdcve-1.1-2019_updated.json',
 'nvdcve-1.1-2018_updated.json',
 'nvdcve-1.1-2017_updated.json',
 'nvdcve-1.1-2016_updated.json',
 'nvdcve-1.1-2015_updated.json',
 'nvdcve-1.1-2014_updated.json',
 'nvdcve-1.1-2013_updated.json',
 'nvdcve-1.1-2012_updated.json',
 'nvdcve-1.1-2011_updated.json',
 'nvdcve-1.1-2010_updated.json',
 'nvdcve-1.1-2009_updated.json',
 'nvdcve-1.1-2008_updated.json',
 'nvdcve-1.1-2007_updated.json',
 'nvdcve-1.1-2006_updated.json',
 'nvdcve-1.1-2005_updated.json',
 'nvdcve-1.1-2004_updated.json',
 'nvdcve-1.1-2003_updated.json',
 'nvdcve-1.1-2002_updated.json'
 
 ]
api_key = 'your-api-key'
chatbot = Chatbot(file_paths, api_key)


  self.text_input.on_submit(self.on_text_input_submit)


Text(value='', description='Question:', layout=Layout(height='100px', width='300px'), placeholder='Enter your …

Output()

In [1]:
import json
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import ipywidgets as widgets
from ipywidgets import HTML, Layout
from IPython.display import display
from openai import OpenAI
import openai

class Chatbot:
    def __init__(self, file_paths, api_key):
        self.file_paths = file_paths
        openai.api_key = api_key
        self.knowledge_base = self.load_data()
        self.documents = self.prepare_documents()
        self.vectorizer = TfidfVectorizer()
        self.tfidf_matrix = self.vectorizer.fit_transform(self.documents)
        self.full_documents = self.knowledge_base
        self.cve_index = self.create_cve_index()
        self.conversation_history = []
        self.setup_interface()

    def load_data(self):
        all_data = []
        for file_path in self.file_paths:
            with open(file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
                all_data.extend(data)
        return all_data

    def concatenate_text(self, json_obj):
        fields = ['CVE_ID', 'Assigner', 'Description']
        concatenated_text = ' '.join(str(json_obj[field]) for field in fields if field in json_obj)
        return concatenated_text

    def prepare_documents(self):
        return [self.concatenate_text(item) for item in self.knowledge_base]

    def create_cve_index(self):
        cve_index = {}
        for idx, item in enumerate(self.knowledge_base):
            cve_id = item.get('CVE_ID')
            if cve_id:
                cve_index[cve_id] = idx
        return cve_index

    # def answer_question(self, query):
    #     if query in self.cve_index:
    #         index = self.cve_index[query]
    #         return [self.full_documents[index]]
        
    #     query_vec = self.vectorizer.transform([query])
    #     similarities = cosine_similarity(query_vec, self.tfidf_matrix)[0]
    #     top_indices = np.argsort(similarities)[-5:][::-1]
    #     top_documents = [self.full_documents[i] for i in top_indices]
    #     return top_documents

    def answer_question(self, query):
        query_words = query.split()
        found_documents = []

        # Check if any word in the query is a CVE_ID
        for word in query_words:
            if word in self.cve_index:
                index = self.cve_index[word]
                found_documents.append(self.full_documents[index])

        # If any CVE_ID is found, return the corresponding documents
        if found_documents:
            return found_documents

        # If no CVE_ID is found, proceed with TF-IDF similarity search
        similarity_scores = np.zeros(len(self.documents))

        for word in query_words:
            query_vec = self.vectorizer.transform([word])
            similarities = cosine_similarity(query_vec, self.tfidf_matrix)[0]
            similarity_scores += similarities

        top_indices = np.argsort(similarity_scores)[-10:][::-1]
        top_documents = [self.full_documents[i] for i in top_indices]
        return top_documents


    def should_use_previous_context(self, question):
        client = OpenAI()
        completion = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "Determine if the following question is a follow-up question."},
                {"role": "user", "content": question}
            ]
        )
        response = completion.choices[0].message.content.strip().lower()
        return 'yes' in response

    def handle_question(self, _=None):

        question = self.text_input.value
        self.conversation_history.append({"role": "user", "content": question})
        client = OpenAI()
        if self.should_use_previous_context(question):
            messages = [
                {"role": "system", "content": "You are an assistant that helps with CVE data. Respond with relevant CVE details. Use previous context as well"}
            ] + self.conversation_history
        else:
            answers = self.answer_question(question)
            json_string = json.dumps(answers, indent=4)
            messages = [
                {"role": "system", "content": "You are an assistant that helps with CVE data. Respond with relevant CVE details."}
            ] + self.conversation_history + [
                {"role": "assistant", "content": json_string}
            ]

        with self.results_output:
            display(widgets.HTML(f'<b><font color="Blue">Question: </font></b><b><font color="Black">{question}</font></b>'))
            for i in messages:
                print(i)
            try:
                completion = client.chat.completions.create(
                    model="gpt-3.5-turbo",
                    messages=messages
                )
                response = completion.choices[0].message.content
                self.conversation_history.append({"role": "assistant", "content": response})

                pretty_response = f"<pre>{response}</pre>"
                styled_response = f"""
                    <style>
                        pre {{
                            background-color: #f0f0f0;
                            border: 1px solid #ccc;
                            border-radius: 5px;
                            padding: 10px;
                            overflow: auto;
                            font-family: Consolas, monospace;
                            font-size: 13px;
                            line-height: 1.4;
                            word-wrap: break-word;
                            width: 1100px;
                        }}
                    </style>
                    <p>{pretty_response}</p>
                """

                html_widget = HTML(
                    value=styled_response,
                    layout=Layout(width='auto')
                )
                display(widgets.HTML(f'<b><font color="green">Answer:</font></b> '))
                display(html_widget)

            except openai.error.RateLimitError:
                print("API rate limit exceeded. Please try again later.")
            except openai.error.OpenAIError as e:
                print(f"An API error occurred: {e}")
            except openai.error.AuthenticationError:
                print("Authentication failed. Check your API key.")
            except Exception as e:
                print(f"An unexpected error occurred: {e}")

    def on_text_input_submit(self, change):
        self.handle_question(change)

    def setup_interface(self):
        self.text_input = widgets.Text(
            placeholder='Enter your question here',
            description='Question:',
            layout=widgets.Layout(width='300px', height='100px')
        )
        self.text_input.style = {'description_width': 'initial'}
        self.text_input.on_submit(self.on_text_input_submit)
        self.results_output = widgets.Output()
        display(self.text_input, self.results_output)

# Initialize the chatbot with multiple files
file_paths = [
    'nvdcve-1.1-recent_updated',
    'nvdcve-1.1-modified_updated',
    'nvdcve-1.1-2024_updated.json',
 'nvdcve-1.1-2023_updated.json',
 'nvdcve-1.1-2022_updated.json',
 'nvdcve-1.1-2021_updated.json',
 'nvdcve-1.1-2020_updated.json',
 'nvdcve-1.1-2019_updated.json',
 'nvdcve-1.1-2018_updated.json',
 'nvdcve-1.1-2017_updated.json',
 'nvdcve-1.1-2016_updated.json',
 'nvdcve-1.1-2015_updated.json',
 'nvdcve-1.1-2014_updated.json',
 'nvdcve-1.1-2013_updated.json',
 'nvdcve-1.1-2012_updated.json',
 'nvdcve-1.1-2011_updated.json',
 'nvdcve-1.1-2010_updated.json',
 'nvdcve-1.1-2009_updated.json',
 'nvdcve-1.1-2008_updated.json',
 'nvdcve-1.1-2007_updated.json',
 'nvdcve-1.1-2006_updated.json',
 'nvdcve-1.1-2005_updated.json',
 'nvdcve-1.1-2004_updated.json',
 'nvdcve-1.1-2003_updated.json',
 'nvdcve-1.1-2002_updated.json'
 
 ]
# file_paths = [
    
#     'nvdcve-1.1-2024_updated.json',
#  'nvdcve-1.1-2023_updated.json',
#  ]
api_key = 'your-api-key'
chatbot = Chatbot(file_paths, api_key)


  self.text_input.on_submit(self.on_text_input_submit)


Text(value='', description='Question:', layout=Layout(height='100px', width='300px'), placeholder='Enter your …

Output()

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
# in this UI is changed in to a window This working latest on 19th Sep 2024
import json
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import openai
import tkinter as tk
from tkinter import scrolledtext
from openai import OpenAI


class Chatbot:
    def __init__(self, file_paths, api_key):
        self.file_paths = file_paths
        openai.api_key = api_key
        self.knowledge_base = self.load_data()
        print(f"Total number of vulnerabilities (documents) loaded: {len(self.knowledge_base)}")  # Print the total number of vulnerabilities

        self.documents = self.prepare_documents()
        self.vectorizer = TfidfVectorizer()
        self.tfidf_matrix = self.vectorizer.fit_transform(self.documents)
        self.full_documents = self.knowledge_base
        self.cve_index = self.create_cve_index()
        self.conversation_history = []
        self.setup_interface()

    def load_data(self):    
        all_data = []
        for file_path in self.file_paths:
            with open(file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
                all_data.extend(data)
        return all_data

    def concatenate_text(self, json_obj):
        fields = ['CVE_ID', 'Assigner', 'Description']
        concatenated_text = ' '.join(str(json_obj[field]) for field in fields if field in json_obj)
        return concatenated_text

    def prepare_documents(self):
        return [self.concatenate_text(item) for item in self.knowledge_base]

    def create_cve_index(self):
        cve_index = {}
        for idx, item in enumerate(self.knowledge_base):
            cve_id = item.get('CVE_ID')
            if cve_id:
                cve_index[cve_id] = idx
        return cve_index

    def answer_question(self, query):
        query_words = query.split()
        found_documents = []

        # Check if any word in the query is a CVE_ID
        for word in query_words:
            if word in self.cve_index:
                index = self.cve_index[word]
                found_documents.append(self.full_documents[index])

        # If any CVE_ID is found, return the corresponding documents
        if found_documents:
            return found_documents

        # If no CVE_ID is found, proceed with TF-IDF similarity search
        similarity_scores = np.zeros(len(self.documents))

        for word in query_words:
            query_vec = self.vectorizer.transform([word])
            similarities = cosine_similarity(query_vec, self.tfidf_matrix)[0]
            similarity_scores += similarities

        top_indices = np.argsort(similarity_scores)[-10:][::-1]
        top_documents = [self.full_documents[i] for i in top_indices]
        return top_documents

    def should_use_previous_context(self, question):
        client = OpenAI()
        completion = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "Determine if the following question is a follow-up question."},
                {"role": "user", "content": question}
            ]
        )
        response = completion.choices[0].message.content.strip().lower()
        return 'yes' in response

    def handle_question(self):
        question = self.text_input.get()
        self.conversation_history.append({"role": "user", "content": question})

        # if self.should_use_previous_context(question):
        #     messages = [
        #         {"role": "system", "content": "You are an assistant that helps with CVE data. Respond with relevant CVE details. Use previous context as well"}
        #     ] + self.conversation_history
        if(True):
            answers = self.answer_question(question)
            json_string = json.dumps(answers, indent=4)
            messages = [
                {"role": "system", "content": "You are an assistant that helps with CVE data. Respond with relevant CVE details. Only answer from the context,Recommend the this website and attach the CVE id in front of it https://nvd.nist.gov/vuln/detail/"}
            ]  + [
                {"role": "assistant", "content": json_string}
            ]+ self.conversation_history

        self.display_message(f'Question: {question}', 'blue')
        for message in messages:
            print(message)

        try:
            client = OpenAI()
            completion = client.chat.completions.create(
                #model="gpt-3.5-turbo",
                #model="gpt-4o",
                model="gpt-4o-mini",
                messages=messages
            )
            response = completion.choices[0].message.content
            self.conversation_history.append({"role": "assistant", "content": response})
            self.display_message(f'Answer:\n{response}', 'green')

        except openai.error.RateLimitError:
            self.display_message("API rate limit exceeded. Please try again later.", 'red')
        except openai.error.OpenAIError as e:
            self.display_message(f"An API error occurred: {e}", 'red')
        except openai.error.AuthenticationError:
            self.display_message("Authentication failed. Check your API key.", 'red')
        except Exception as e:
            self.display_message(f"An unexpected error occurred: {e}", 'red')

    def display_message(self, message, color):
        self.results_output.configure(state='normal')
        self.results_output.insert(tk.END, message + '\n', (color,))
        self.results_output.configure(state='disabled')
        self.results_output.see(tk.END)

    def setup_interface(self):
        root = tk.Tk()
        root.title("Chatbot Interface")

        self.text_input = tk.Entry(root, width=100)
        self.text_input.pack(pady=10)
        self.text_input.bind('<Return>', lambda event: self.handle_question())

        self.results_output = scrolledtext.ScrolledText(root, width=100, height=30, wrap=tk.WORD)
        self.results_output.pack(pady=10)
        self.results_output.tag_configure('blue', foreground='blue')
        self.results_output.tag_configure('green', foreground='green')
        self.results_output.tag_configure('red', foreground='red')
        self.results_output.configure(state='disabled')

        root.mainloop()

# Initialize the chatbot with multiple files
file_paths = [
    'nvdcve-1.1-recent_updated.json',
    'nvdcve-1.1-modified_updated.json',
    'nvdcve-1.1-2024_updated.json',
 'nvdcve-1.1-2023_updated.json',
 'nvdcve-1.1-2022_updated.json',
 'nvdcve-1.1-2021_updated.json',
 'nvdcve-1.1-2020_updated.json',
 'nvdcve-1.1-2019_updated.json',
 'nvdcve-1.1-2018_updated.json',
 'nvdcve-1.1-2017_updated.json',
 'nvdcve-1.1-2016_updated.json',
 'nvdcve-1.1-2015_updated.json',
 'nvdcve-1.1-2014_updated.json',
 'nvdcve-1.1-2013_updated.json',
 'nvdcve-1.1-2012_updated.json',
 'nvdcve-1.1-2011_updated.json',
 'nvdcve-1.1-2010_updated.json',
 'nvdcve-1.1-2009_updated.json',
 'nvdcve-1.1-2008_updated.json',
 'nvdcve-1.1-2007_updated.json',
 'nvdcve-1.1-2006_updated.json',
 'nvdcve-1.1-2005_updated.json',
 'nvdcve-1.1-2004_updated.json',
 'nvdcve-1.1-2003_updated.json',
 'nvdcve-1.1-2002_updated.json'
]
api_key = 'your-api-key'
chatbot = Chatbot(file_paths, api_key)


{'role': 'system', 'content': 'You are an assistant that helps with CVE data. Respond with relevant CVE details. Only answer from the context,Recommend the this website and attach the CVE id in front of it https://nvd.nist.gov/vuln/detail/'}
{'role': 'assistant', 'content': '[\n    {\n        "CVE_ID": "CVE-2022-33248",\n        "Assigner": "security.cna@qualcomm.com",\n        "Description": "Memory corruption in User Identity Module due to integer overflow to buffer overflow when a segement is received via qmi http.",\n        "References": [\n            {\n                "url": "https://www.qualcomm.com/company/product-security/bulletins/february-2023-bulletin"\n            }\n        ],\n        "PublishedDate": "2023-02-12T04:15Z",\n        "LastModifiedDate": "2024-04-12T17:16Z",\n        "Impact": {\n            "baseMetricV3": {\n                "cvssV3": {\n                    "version": "3.1",\n                    "vectorString": "CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:

In [3]:
# Evaluatoion of code latest working on 19th SEP 2024
# in this UI is changed in to a window 
# Here gpt output file is created which generates actual answer which will later be evaluated
import json
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import openai
import tkinter as tk
from tkinter import scrolledtext
from openai import OpenAI


class Chatbot:
    def __init__(self, file_paths, api_key, questions_file, output_file):
        self.file_paths = file_paths
        openai.api_key = api_key
        self.knowledge_base = self.load_data()
        self.documents = self.prepare_documents()
        self.vectorizer = TfidfVectorizer()
        self.tfidf_matrix = self.vectorizer.fit_transform(self.documents)
        self.full_documents = self.knowledge_base
        self.cve_index = self.create_cve_index()
        self.conversation_history = []
        self.questions_file = questions_file
        self.output_file = output_file
        self.setup_interface()



    def load_data(self):    
        all_data = []
        for file_path in self.file_paths:
            with open(file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
                all_data.extend(data)
        return all_data
    def load_questions(self):
        """Loads questions and expected answers from a JSON file."""
        with open(self.questions_file, 'r', encoding='utf-8') as file:
            return json.load(file)


    def concatenate_text(self, json_obj):
        fields = ['CVE_ID', 'Assigner', 'Description']
        concatenated_text = ' '.join(str(json_obj[field]) for field in fields if field in json_obj)
        return concatenated_text

    def prepare_documents(self):
        return [self.concatenate_text(item) for item in self.knowledge_base]

    def create_cve_index(self):
        cve_index = {}
        for idx, item in enumerate(self.knowledge_base):
            cve_id = item.get('CVE_ID')
            if cve_id:
                cve_index[cve_id] = idx
        return cve_index

    def answer_question(self, query):
        query_words = query.split()
        found_documents = []

        # Check if any word in the query is a CVE_ID
        for word in query_words:
            if word in self.cve_index:
                index = self.cve_index[word]
                found_documents.append(self.full_documents[index])

        # If any CVE_ID is found, return the corresponding documents
        if found_documents:
            return found_documents

        # If no CVE_ID is found, proceed with TF-IDF similarity search
        similarity_scores = np.zeros(len(self.documents))

        for word in query_words:
            query_vec = self.vectorizer.transform([word])
            similarities = cosine_similarity(query_vec, self.tfidf_matrix)[0]
            similarity_scores += similarities

        top_indices = np.argsort(similarity_scores)[-10:][::-1]
        top_documents = [self.full_documents[i] for i in top_indices]
        return top_documents

    def should_use_previous_context(self, question):
        client = OpenAI()
        completion = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "Determine if the following question is a follow-up question."},
                {"role": "user", "content": question}
            ]
        )
        response = completion.choices[0].message.content.strip().lower()
        return 'yes' in response

    def handle_question(self,question):
        # question = self.text_input.get()
        self.conversation_history.append({"role": "user", "content": question})

        # if self.should_use_previous_context(question):
        #     messages = [
        #         {"role": "system", "content": "You are an assistant that helps with CVE data. Respond with relevant CVE details. Use previous context as well"}
        #     ] + self.conversation_history
        if(True):
            answers = self.answer_question(question)
            json_string = json.dumps(answers, indent=4)
            # messages = [
            #     {"role": "system", "content": "You are an assistant that helps with CVE data. Respond with relevant CVE details. Only answer from the context \n if answer is not available in the context say data not available \n Recommend the this website and attach the CVE id in front of it https://nvd.nist.gov/vuln/detail/ \n"}
            # ]  + [
            #     {"role": "assistant", "content": json_string}
            # ]+ self.conversation_history # This has been commented to remove the conversation history
            messages = [
                {"role": "system", "content": "You are an assistant that helps with CVE data. Respond with relevant CVE details. Only answer from the context \n if answer is not available in the context say data not available \n Recommend the this website and attach the CVE id in front of it https://nvd.nist.gov/vuln/detail/ \n"}
            ]  + [
                {"role": "assistant", "content": json_string}
                
            ]+ [{"role": "user", "content": question}]


        self.display_message(f'Question: {question}', 'blue')
        for message in messages:
            print(message)

        try:
            client = OpenAI()
            completion = client.chat.completions.create(
                # model="gpt-3.5-turbo",
                #model="gpt-4o",
                model="gpt-4o-mini",
                messages=messages
            )
            response = completion.choices[0].message.content
            self.conversation_history.append({"role": "assistant", "content": response})
            self.display_message(f'Answer:\n{response}', 'green')
            return response

        except openai.error.RateLimitError:
            self.display_message("API rate limit exceeded. Please try again later.", 'red')
        except openai.error.OpenAIError as e:
            self.display_message(f"An API error occurred: {e}", 'red')
        except openai.error.AuthenticationError:
            self.display_message("Authentication failed. Check your API key.", 'red')
        except Exception as e:
            self.display_message(f"An unexpected error occurred: {e}", 'red')

    def display_message(self, message, color):
        self.results_output.configure(state='normal')
        self.results_output.insert(tk.END, message + '\n', (color,))
        self.results_output.configure(state='disabled')
        self.results_output.see(tk.END)

    
    def save_results(self, results):
        """Saves the results (questions, expected answers, actual answers) to a JSON file."""
        with open(self.output_file, 'w', encoding='utf-8') as file:
            json.dump(results, file, indent=4)
    
    def process_questions(self):
        """Processes questions from the loaded JSON and saves the results."""
        questions_data = self.load_questions()
        results = []

        for item in questions_data:
            question = item['question']
            expected_answer = item.get('expected_answer', 'No expected answer provided')
            # actual_answer = self.answer_question(question)
            actual_answer =  self.handle_question(question)

            result = {
                'question': question,
                'expected_answer': expected_answer,
                'actual_answer': actual_answer
            }

            results.append(result)

        self.save_results(results)
    
    def setup_interface(self):
        root = tk.Tk()
        root.title("ChatNVD Interface GPT")

        self.text_input = tk.Entry(root, width=100)
        self.text_input.pack(pady=10)
        self.text_input.bind('<Return>', lambda event: self.handle_question())

        self.results_output = scrolledtext.ScrolledText(root, width=100, height=30, wrap=tk.WORD)
        self.results_output.pack(pady=10)
        self.results_output.tag_configure('blue', foreground='blue')
        self.results_output.tag_configure('green', foreground='green')
        self.results_output.tag_configure('red', foreground='red')
        self.results_output.configure(state='disabled')

        # Button to process questions and save results
        process_button = tk.Button(root, text="Process Questions", command=self.process_questions)
        process_button.pack(pady=10)

        root.mainloop()

# Initialize the chatbot with multiple files
file_paths = [
    'nvdcve-1.1-recent_updated.json',
    'nvdcve-1.1-modified_updated.json',
    'nvdcve-1.1-2024_updated.json',
 'nvdcve-1.1-2023_updated.json',
 'nvdcve-1.1-2022_updated.json',
 'nvdcve-1.1-2021_updated.json',
 'nvdcve-1.1-2020_updated.json',
 'nvdcve-1.1-2019_updated.json',
 'nvdcve-1.1-2018_updated.json',
 'nvdcve-1.1-2017_updated.json',
 'nvdcve-1.1-2016_updated.json',
 'nvdcve-1.1-2015_updated.json',
 'nvdcve-1.1-2014_updated.json',
 'nvdcve-1.1-2013_updated.json',
 'nvdcve-1.1-2012_updated.json',
 'nvdcve-1.1-2011_updated.json',
 'nvdcve-1.1-2010_updated.json',
 'nvdcve-1.1-2009_updated.json',
 'nvdcve-1.1-2008_updated.json',
 'nvdcve-1.1-2007_updated.json',
 'nvdcve-1.1-2006_updated.json',
 'nvdcve-1.1-2005_updated.json',
 'nvdcve-1.1-2004_updated.json',
 'nvdcve-1.1-2003_updated.json',
 'nvdcve-1.1-2002_updated.json'
]
api_key = 'your-api-key'
questions_file = 'questions1.json'  # JSON file with questions and expected answers
output_file = 'gpt_output_results_6.json'  # JSON file to save results
api_key = 'your-api-key'
chatbot = Chatbot(file_paths, api_key, questions_file, output_file)


{'role': 'system', 'content': 'You are an assistant that helps with CVE data. Respond with relevant CVE details. Only answer from the context \n if answer is not available in the context say data not available \n Recommend the this website and attach the CVE id in front of it https://nvd.nist.gov/vuln/detail/ \n'}
{'role': 'assistant', 'content': '[\n    {\n        "CVE_ID": "CVE-2016-9733",\n        "Assigner": "psirt@us.ibm.com",\n        "Description": "IBM Team Concert (RTC) 4.0, 5.0 and 6.0 is vulnerable to cross-site scripting. This vulnerability allows users to embed arbitrary JavaScript code in the Web UI thus altering the intended functionality potentially leading to credentials disclosure within a trusted session. IBM X-Force ID: 119762.",\n        "References": [\n            {\n                "url": "https://exchange.xforce.ibmcloud.com/vulnerabilities/119762"\n            },\n            {\n                "url": "http://www.ibm.com/support/docview.wss?uid=swg22004611"\n 

: 

: 

In [14]:
import json
import random

def load_cve_data(cve_file):
    """Loads CVE data from a JSON file."""
    with open(cve_file, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

def generate_question(cve):
    """Generates a question for a given CVE ID about its published date."""
    question = f"What is the published date of {cve['CVE_ID']}?"
    expected_answer = cve.get('PublishedDate', 'No published date available')
    
    return {
        "question": question,
        "expected_answer": expected_answer
    }

def generate_questions(cve_data, num_questions=5):
    """Generates random questions for the specified number of CVEs."""
    selected_cves = random.sample(cve_data, num_questions)
    questions = []
    
    for cve in selected_cves:
        questions.append(generate_question(cve))

    return questions

def save_questions_to_file(questions, output_file):
    """Saves the generated questions to a JSON file."""
    with open(output_file, 'w', encoding='utf-8') as file:
        json.dump(questions, file, indent=4)

# Load CVE data from a file
cve_file = 'nvdcve-1.1-2024_updated.json'  # Your CVE JSON file path
cve_data = load_cve_data(cve_file)

# Generate questions from random CVE IDs
questions = generate_questions(cve_data, num_questions=5)

# Save the generated questions to a JSON file
output_file = 'questions1.json'
save_questions_to_file(questions, output_file)

print(f"Questions have been generated and saved to {output_file}")


Questions have been generated and saved to questions1.json


In [15]:
# in this multiple questions are asked
import json
import random

def load_cve_data(cve_file):
    """Loads CVE data from a JSON file."""
    with open(cve_file, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

def generate_question(cve, field):
    """Generates a question and expected answer for a given field of the CVE."""
    if field == "PublishedDate":
        question = f"What is the published date of {cve['CVE_ID']}?"
        expected_answer = cve.get('PublishedDate', 'No published date available')
    elif field == "Description":
        question = f"What is the description of {cve['CVE_ID']}?"
        expected_answer = cve.get('Description', 'No description available')
    
    return {
        "question": question,
        "expected_answer": expected_answer
    }

def generate_questions(cve_data, num_questions=5):
    """Generates random questions for the specified number of CVEs."""
    selected_cves = random.sample(cve_data, num_questions)
    questions = []
    
    for cve in selected_cves:
        # Generate a question for the published date
        questions.append(generate_question(cve, "PublishedDate"))
        # Generate a question for the description
        questions.append(generate_question(cve, "Description"))

    return questions

def save_questions_to_file(questions, output_file):
    """Saves the generated questions to a JSON file."""
    with open(output_file, 'w', encoding='utf-8') as file:
        json.dump(questions, file, indent=4)

# Load CVE data from a file
cve_file = 'nvdcve-1.1-2024_updated.json'  # Your CVE JSON file path
cve_data = load_cve_data(cve_file)

# Generate questions from random CVE IDs
questions = generate_questions(cve_data, num_questions=5)

# Save the generated questions to a JSON file
output_file = 'questions1.json'
save_questions_to_file(questions, output_file)

print(f"Questions have been generated and saved to {output_file}")


Questions have been generated and saved to questions1.json


In [None]:
import json
import random

def load_cve_data(cve_file):
    """Loads CVE data from a JSON file."""
    with open(cve_file, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

def generate_question(cve, field):
    """Generates a question and expected answer for a given field of the CVE."""
    if field == "PublishedDate":
        question = f"What is the published date of {cve['CVE_ID']}?"
        expected_answer = cve.get('PublishedDate', 'No data available')
    elif field == "Description":
        question = f"What is the description of {cve['CVE_ID']}?"
        expected_answer = cve.get('Description', 'No data available')
    elif field == "ExploitabilityScore":
        question = f"What is the exploitability score of {cve['CVE_ID']}?"
        expected_answer = cve.get('exploitabilityScore', 'No data available')
    elif field == "ImpactScore":
        question = f"What is the impact score of {cve['CVE_ID']}?"
        expected_answer = cve.get('ImpactScore', 'No data available')
    elif field == "BaseScore":
        question = f"What is the base score of {cve['CVE_ID']}?"
        expected_answer = cve.get('BaseScore', 'No data available')
    
    return {
        "question": question,
        "expected_answer": expected_answer
    }

def generate_questions(cve_data, num_questions=5):
    """Generates random questions for the specified number of CVEs."""
    selected_cves = random.sample(cve_data, num_questions)
    questions = []
    
    for cve in selected_cves:
        # Generate questions for each required field
        questions.append(generate_question(cve, "PublishedDate"))
        questions.append(generate_question(cve, "Description"))
        questions.append(generate_question(cve, "ExploitabilityScore"))
        questions.append(generate_question(cve, "ImpactScore"))
        questions.append(generate_question(cve, "BaseScore"))

    return questions

def save_questions_to_file(questions, output_file):
    """Saves the generated questions to a JSON file."""
    with open(output_file, 'w', encoding='utf-8') as file:
        json.dump(questions, file, indent=4)

# Load CVE data from a file
cve_file = [
      'nvdcve-1.1-2024_updated.json',
      'nvdcve-1.1-2023_updated.json',
 'nvdcve-1.1-2022_updated.json',
 'nvdcve-1.1-2021_updated.json',
 'nvdcve-1.1-2020_updated.json',
 'nvdcve-1.1-2019_updated.json',
 'nvdcve-1.1-2018_updated.json',
 'nvdcve-1.1-2017_updated.json',
 'nvdcve-1.1-2016_updated.json',
 'nvdcve-1.1-2015_updated.json',
 'nvdcve-1.1-2014_updated.json',
 'nvdcve-1.1-2013_updated.json',
 'nvdcve-1.1-2012_updated.json',
 'nvdcve-1.1-2011_updated.json',
 'nvdcve-1.1-2010_updated.json',
 'nvdcve-1.1-2009_updated.json',
 'nvdcve-1.1-2008_updated.json',
 'nvdcve-1.1-2007_updated.json',
 'nvdcve-1.1-2006_updated.json',
 'nvdcve-1.1-2005_updated.json',
 'nvdcve-1.1-2004_updated.json',
 'nvdcve-1.1-2003_updated.json',
 'nvdcve-1.1-2002_updated.json'
              
              ]  # Your CVE JSON file path
cve_data = load_cve_data(cve_file)

# Generate questions from random CVE IDs
questions = generate_questions(cve_data, num_questions=5)

# Save the generated questions to a JSON file
output_file = 'questions1.json'
save_questions_to_file(questions, output_file)

print(f"Questions have been generated and saved to {output_file}")


Questions have been generated and saved to questions1.json


In [28]:
import json
import random

def load_cve_data(cve_file):
    """Loads CVE data from a JSON file."""
    with open(cve_file, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

def get_nested_value(data, keys, default="data not available"):
    """Retrieves a nested value from a dictionary given a list of keys."""
    for key in keys:
        data = data.get(key, {})
    return data if data else default

def generate_question(cve, field):
    """Generates a question and expected answer for a given field of the CVE."""
    if field == "PublishedDate":
        question = f"What is the published date of {cve['CVE_ID']}?"
        expected_answer = cve.get('PublishedDate', 'data not available')
    elif field == "Description":
        question = f"What is the description of {cve['CVE_ID']}?"
        expected_answer = cve.get('Description', 'data not available')
    elif field == "ExploitabilityScore":
        # Retrieve ExploitabilityScore from the nested structure
        expected_answer = get_nested_value(cve, ['Impact', 'baseMetricV3', 'exploitabilityScore'])
        question = f"What is the exploitability score of {cve['CVE_ID']}?"
    elif field == "ImpactScore":
        # Retrieve ImpactScore from the nested structure
        expected_answer = get_nested_value(cve, ['Impact', 'baseMetricV3', 'impactScore'])
        question = f"What is the impact score of {cve['CVE_ID']}?"
    elif field == "BaseScore":
        # Retrieve BaseScore from the nested structure
        expected_answer = get_nested_value(cve, ['Impact', 'baseMetricV3', 'cvssV3', 'baseScore'])
        question = f"What is the base score of {cve['CVE_ID']}?"
    
    return {
        "question": question,
        "expected_answer": expected_answer
    }

def generate_questions(cve_data, num_questions=5):
    """Generates random questions for the specified number of CVEs."""
    selected_cves = random.sample(cve_data, num_questions)
    questions = []
    
    for cve in selected_cves:
        # Generate questions for each required field
        questions.append(generate_question(cve, "PublishedDate"))
        questions.append(generate_question(cve, "Description"))
        questions.append(generate_question(cve, "ExploitabilityScore"))
        questions.append(generate_question(cve, "ImpactScore"))
        questions.append(generate_question(cve, "BaseScore"))

    return questions

def save_questions_to_file(questions, output_file):
    """Saves the generated questions to a JSON file."""
    with open(output_file, 'w', encoding='utf-8') as file:
        json.dump(questions, file, indent=4)

# Load CVE data from a file
cve_file = 'nvdcve-1.1-2024_updated.json'  # Your CVE JSON file path
cve_data = load_cve_data(cve_file)

# Generate questions from random CVE IDs
questions = generate_questions(cve_data, num_questions=5)

# Save the generated questions to a JSON file
output_file = 'questions1.json'
save_questions_to_file(questions, output_file)

print(f"Questions have been generated and saved to {output_file}")


Questions have been generated and saved to questions1.json
