In [None]:
#this code is for evaluation
# in this UI is changed in to a window 
import json
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import openai
import tkinter as tk
from tkinter import scrolledtext
#from openai import OpenAI
#from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
#from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import LLMChain

import google.generativeai as genai
#from google.colab import userdata
from IPython.display import Markdown
from langchain.schema import Document
import os
import shutil
import json
from langchain.chains import LLMChain
from langchain_core.messages import HumanMessage, SystemMessage
import time;

class Chatbot:
    def __init__(self, file_paths, api_key, questions_file, output_file):
        self.file_paths = file_paths
        openai.api_key = api_key
        self.knowledge_base = self.load_data()
        self.documents = self.prepare_documents()
        self.vectorizer = TfidfVectorizer()
        self.tfidf_matrix = self.vectorizer.fit_transform(self.documents)
        self.full_documents = self.knowledge_base
        self.cve_index = self.create_cve_index()
        self.conversation_history = []
        self.questions_file = questions_file
        self.output_file = output_file
        self.setup_interface()


    def load_data(self):    
        all_data = []
        for file_path in self.file_paths:
            with open(file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
                all_data.extend(data)
        return all_data
    
    def load_questions(self):
        """Loads questions and expected answers from a JSON file."""
        with open(self.questions_file, 'r', encoding='utf-8') as file:
            return json.load(file)


    def concatenate_text(self, json_obj):
        fields = ['CVE_ID', 'Assigner', 'Description']
        concatenated_text = ' '.join(str(json_obj[field]) for field in fields if field in json_obj)
        return concatenated_text

    def prepare_documents(self):
        return [self.concatenate_text(item) for item in self.knowledge_base]

    def create_cve_index(self):
        cve_index = {}
        for idx, item in enumerate(self.knowledge_base):
            cve_id = item.get('CVE_ID')
            if cve_id:
                cve_index[cve_id] = idx
        return cve_index

    def answer_question(self, query):
        query_words = query.split()
        found_documents = []

        # Check if any word in the query is a CVE_ID
        for word in query_words:
            if word in self.cve_index:
                index = self.cve_index[word]
                found_documents.append(self.full_documents[index])

        # If any CVE_ID is found, return the corresponding documents
        if found_documents:
            return found_documents

        # If no CVE_ID is found, proceed with TF-IDF similarity search
        similarity_scores = np.zeros(len(self.documents))

        for word in query_words:
            query_vec = self.vectorizer.transform([word])
            similarities = cosine_similarity(query_vec, self.tfidf_matrix)[0]
            similarity_scores += similarities

        top_indices = np.argsort(similarity_scores)[-10:][::-1]
        top_documents = [self.full_documents[i] for i in top_indices]
        return top_documents
    
    def json_string_to_docs(self, json_string):
        try:
            # Parse the JSON string into a Python object (dict or list)
            data = json.loads(json_string)

            # Create a list to store the Document objects
            docs = []

            # Helper function to recursively flatten the JSON structure
            def flatten_json(data, parent_key=''):
                items = {}
                for key, value in data.items():
                    new_key = f"{parent_key}.{key}" if parent_key else key
                    if isinstance(value, dict):
                        items.update(flatten_json(value, new_key))  # Recursively flatten nested dicts
                    elif isinstance(value, list):
                        for i, item in enumerate(value):
                            items.update(flatten_json({f"{new_key}[{i}]": item}))
                    else:
                        items[new_key] = value
                return items

            # Check if the data is a list of entries
            if isinstance(data, list):
                # Iterate over each item in the list
                for item in data:
                    flattened_data = flatten_json(item)  # Flatten each item in the list
                    
                    # Convert the flattened dictionary into the content and metadata strings
                    content = "\n".join([f"{k}: {v}" for k, v in flattened_data.items()])
                    metadata = flattened_data

                    # Create a Document object with the content and metadata
                    doc = Document(page_content=content, metadata=metadata)
                    docs.append(doc)

            # If the JSON string is not a list, handle it as a single dictionary
            else:
                flattened_data = flatten_json(data)  # Flatten the dictionary
                
                # Convert the flattened dictionary into the content and metadata strings
                content = "\n".join([f"{k}: {v}" for k, v in flattened_data.items()])
                metadata = flattened_data

                # Create a single Document object
                doc = Document(page_content=content, metadata=metadata)
                docs.append(doc)

            return docs

        except json.JSONDecodeError:
            print("Invalid JSON format")
            return []

    
    
    def get_conversational_chain(self):

        # prompt_template = """
        # You are an assistant that helps with CVE data. Only use the context provided. Respond with CVE details recommend the this website and attach the CVE id in front of it https://nvd.nist.gov/vuln/detail/ \n\n
        
        # Context:\n {context}?\n
        # Question: \n{question}\n

        # Answer:
        # """
        ###########
        # prompt_template = """
        # You are an assistant that helps with CVE data. Only use the context provided. Respond with CVE details recommend the this website and attach the CVE id in front of it https://nvd.nist.gov/vuln/detail/ \n
        
        # Context:\n {context}\n
        # Question: \n{question}?\n

        # Answer:
        # """
        ###########
        ## This prompt is working
        prompt_template = """
        Context: \n{context}.\n
        Task: - You are an assistant that helps with CVE data. 
              - Only use the context provided. Respond with CVE details.
              - Recommend the this website and attach the CVE id in front of it https://nvd.nist.gov/vuln/detail/ \n
        
        Question: \n{question} ?\n

        Answer:\n
        """

        model = ChatGoogleGenerativeAI(model="gemini-pro",
                                temperature=0.2)

        prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
        print("########################################################")
        print(prompt)
        print("########################################################")
        chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

        return chain

    def handle_question(self,question):
        # question = self.text_input.get()
        self.conversation_history.append({"role": "user", "content": question})

        
        if(True):
            answers = self.answer_question(question)
            json_string = json.dumps(answers, indent=4)
            print(json_string)
            GOOGLE_API_KEY = "AIzaSyBOTln5W2E2QlsapBrlYm2UaR8bWbL4bZw"

            genai.configure(api_key=GOOGLE_API_KEY)

            model = ChatGoogleGenerativeAI(model="gemini-pro",google_api_key=GOOGLE_API_KEY,
                                        temperature=0.2,convert_system_message_to_human=True)

            # template = """You are an assistant that helps with CVE data. Only use the context provided. Respond with relevant CVE details.
            # {context}
            # Question: {question}
            # Helpful Answer:"""
            
            # QA_CHAIN_PROMPT = PromptTemplate.from_template(template)# Run chain
            # print(QA_CHAIN_PROMPT)
            # qa_chain = LLMChain(
            # llm=model,
            # prompt=QA_CHAIN_PROMPT
            #    )
            # #result = qa_chain.run({"context": json_string, "question": question})
            # result = qa_chain({"context": json_string,"question": question})
            # print("#################")
            # print(result)
            # print("#################")
            # result = qa_chain({"context": json_string,"query": question})
            ########################################################
            # messages = [
            # # ("system", "You are an assistant that helps with CVE data. tell about CVE-2024-4405\n" + "this is the context\n" + "Context:"+ json_string),
            # ("system",  "Use this data to answer the question in detail \n" + json_string),
            # ("human", question),
            
            # ]
            # print(messages)
            # result = model.invoke(messages)
            # print(result)
            ######################################################
            chain = self.get_conversational_chain()

    
            result = chain(
                {"input_documents":self.json_string_to_docs(json_string), "question": question }
                , return_only_outputs=True)

            print(result)

            formatted_output = result['output_text'].replace('**', '')

            self.display_message(f'Question: {question}', 'blue')
            self.display_message(f'Answer:\n{formatted_output}', 'green')
            time.sleep(5)

            return formatted_output
            ######################################################
            # result = model([
            # #SystemMessage(content="Provide details related to the question using the context"),
            # HumanMessage(content="Context:" + json_string + "\n Question:"+question)

                
            # ]).content
            # self.display_message(f'Question: {question}', 'blue')
            # self.display_message(f'Answer:\n{result}', 'green')
            ######################################################
    def display_message(self, message, color):
        self.results_output.configure(state='normal')
        self.results_output.insert(tk.END, message + '\n', (color,))
        self.results_output.configure(state='disabled')
        self.results_output.see(tk.END)

    def save_results(self, results):
        """Saves the results (questions, expected answers, actual answers) to a JSON file."""
        with open(self.output_file, 'w', encoding='utf-8') as file:
            json.dump(results, file, indent=4)
    
    def process_questions(self):
        """Processes questions from the loaded JSON and saves the results."""
        questions_data = self.load_questions()
        results = []

        for item in questions_data:
            question = item['question']
            expected_answer = item.get('expected_answer', 'No expected answer provided')
            # actual_answer = self.answer_question(question)
            actual_answer =  self.handle_question(question)

            result = {
                'question': question,
                'expected_answer': expected_answer,
                'actual_answer': actual_answer
            }

            results.append(result)

        self.save_results(results)
    
    def setup_interface(self):
        root = tk.Tk()
        root.title("ChatNVD Interface Gemini")

        self.text_input = tk.Entry(root, width=100)
        self.text_input.pack(pady=10)
        self.text_input.bind('<Return>', lambda event: self.handle_question())

        self.results_output = scrolledtext.ScrolledText(root, width=100, height=30, wrap=tk.WORD)
        self.results_output.pack(pady=10)
        self.results_output.tag_configure('blue', foreground='blue')
        self.results_output.tag_configure('green', foreground='green')
        self.results_output.tag_configure('red', foreground='red')
        self.results_output.configure(state='disabled')

        # Button to process questions and save results
        process_button = tk.Button(root, text="Process Questions", command=self.process_questions)
        process_button.pack(pady=10)

        root.mainloop()

# Initialize the chatbot with multiple files
file_paths = [
    'nvdcve-1.1-recent_updated.json',
    'nvdcve-1.1-modified_updated.json',
    'nvdcve-1.1-2024_updated.json',
 'nvdcve-1.1-2023_updated.json',
 'nvdcve-1.1-2022_updated.json',
 'nvdcve-1.1-2021_updated.json',
 'nvdcve-1.1-2020_updated.json',
 'nvdcve-1.1-2019_updated.json',
 'nvdcve-1.1-2018_updated.json',
 'nvdcve-1.1-2017_updated.json',
 'nvdcve-1.1-2016_updated.json',
 'nvdcve-1.1-2015_updated.json',
 'nvdcve-1.1-2014_updated.json',
 'nvdcve-1.1-2013_updated.json',
 'nvdcve-1.1-2012_updated.json',
 'nvdcve-1.1-2011_updated.json',
 'nvdcve-1.1-2010_updated.json',
 'nvdcve-1.1-2009_updated.json',
 'nvdcve-1.1-2008_updated.json',
 'nvdcve-1.1-2007_updated.json',
 'nvdcve-1.1-2006_updated.json',
 'nvdcve-1.1-2005_updated.json',
 'nvdcve-1.1-2004_updated.json',
 'nvdcve-1.1-2003_updated.json',
 'nvdcve-1.1-2002_updated.json'
]
api_key = 'your-api-key'
questions_file = 'questions1.json'  # JSON file with questions and expected answers
output_file = 'gemini_output_results_5.json'  # JSON file to save results
api_key = 'your-api-key'
chatbot = Chatbot(file_paths, api_key, questions_file, output_file)
