In [None]:
import nltk
import spacy
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string
import time
import sqlite3
import faiss
import string
import re
from collections import defaultdict
from textblob import Word, TextBlob


In [None]:
faq_dict = {
    "What is Artificial Intelligence (AI)?": "Artificial Intelligence refers to the simulation of human intelligence in machines that are programmed to think and learn. It encompasses tasks such as learning, reasoning, problem-solving, perception, and language understanding.",
    "How does AI work?": "AI systems work by processing large amounts of data, recognizing patterns, and making decisions based on statistical analysis. They utilize algorithms and models to perform tasks without explicit human instructions.",
    "What are the different types of AI?": "AI can be categorized into Narrow AI (designed for specific tasks), General AI (possessing human-like cognitive abilities), and Superintelligent AI (surpassing human intelligence). Currently, most AI applications are Narrow AI.",
    "What is Machine Learning?": "Machine Learning is a subset of AI that enables machines to learn from data and improve their performance over time without being explicitly programmed.",
    "What is Deep Learning?": "Deep Learning is a subset of Machine Learning that uses neural networks with many layers (deep neural networks) to analyze various factors of data. It is particularly effective in processing unstructured data like images and speech.",
    "What are Neural Networks?": "Neural Networks are algorithms inspired by the human brain's structure, designed to recognize patterns. They consist of interconnected nodes (neurons) that process data by assigning weights and biases.",
    "What is Natural Language Processing (NLP)?": "NLP is a branch of AI that focuses on the interaction between computers and humans through natural language. It enables machines to understand, interpret, and generate human language.",
    "What are the applications of AI?": "AI is used in various fields, including healthcare (diagnostics), finance (fraud detection), transportation (autonomous vehicles), customer service (chatbots), and more.",
    "What are the benefits of AI?": "AI can increase efficiency, reduce human error, automate repetitive tasks, and provide insights through data analysis, leading to better decision-making.",
    "What are the risks of AI?": "Potential risks include job displacement, ethical concerns, biases in decision-making, security vulnerabilities, and the possibility of unintended consequences.",
    "What is the future of AI?": "The future of AI involves advancements in machine learning algorithms, increased integration into various industries, and ongoing discussions about ethical and societal implications.",
    "Can AI replace human jobs?": "AI can automate certain tasks, potentially displacing some jobs. However, it also creates new opportunities and roles that require human skills.",
    "Is AI dangerous?": "AI poses risks if misused or poorly designed. Ensuring ethical guidelines and safety measures can mitigate potential dangers.",
    "How is AI used in business?": "Businesses use AI for data analysis, customer service automation, predictive analytics, and enhancing operational efficiency.",
    "What is the difference between AI and Machine Learning?": "AI is the broader concept of machines being able to carry out tasks in a way that we would consider 'smart,' while Machine Learning is a subset of AI that allows machines to learn from data.",
    "What is an AI algorithm?": "An AI algorithm is a set of rules and statistical techniques used by AI systems to learn from data and make decisions.",
    "What is the Turing Test?": "The Turing Test, proposed by Alan Turing, is a measure of a machine's ability to exhibit intelligent behavior indistinguishable from that of a human.",
    "What is a chatbot?": "A chatbot is an AI program designed to simulate conversation with human users, especially over the Internet.",
    "What is computer vision?": "Computer vision is an AI field that enables machines to interpret and process visual information from the world, such as images and videos.",
    "What is reinforcement learning?": "Reinforcement learning is a type of Machine Learning where an agent learns to make decisions by performing actions and receiving rewards or penalties.",
    "What is supervised learning?": "Supervised learning is a Machine Learning approach where models are trained on labeled datasets, meaning the input data is paired with the correct output.",
    "What is unsupervised learning?": "Unsupervised learning is a Machine Learning approach where models are trained on unlabeled data, and the system tries to learn patterns and structure from the input.",
    "What is semi-supervised learning?": "Semi-supervised learning is a Machine Learning approach that combines a small amount of labeled data with a large amount of unlabeled data during training.",
    "What is transfer learning?": "Transfer learning is a Machine Learning technique where a model developed for one task is reused as the starting point for a model on a second task.",
    "What is an AI model?": "An AI model is a mathematical representation of a real-world process, created by training algorithms on data to recognize patterns and make predictions.",
    "What is data mining?": "Data mining is the process of discovering patterns and knowledge from large amounts of data, often involving methods at the intersection of machine learning, statistics, and database systems.",
    "What is big data?": "Big data refers to extremely large datasets that may be analyzed computationally to reveal patterns, trends, and associations.",
    "What is the difference between AI and automation?": "Automation involves using technology to perform tasks without human intervention, often following predefined rules, whereas AI involves machines exhibiting intelligence and making decisions based on data.",
    "What is cognitive computing?": "Cognitive computing refers to systems that mimic human thought processes to solve complex problems, often involving self-learning through data mining and pattern recognition.",
    "What is the singularity in AI?": "The singularity refers to a hypothetical point in the future when AI surpasses human intelligence, leading to unforeseeable changes to human civilization.",
    "What is explainable AI?": "Explainable AI refers to methods and techniques in AI that make the results and workings of AI systems understandable to humans.",
    "What is ethical AI?": "Ethical AI involves designing and deploying AI systems in a manner that aligns with moral principles and values, ensuring fairness, transparency, and accountability.",
    "What is bias in AI?": "Bias in AI refers to systematic errors that result in unfair outcomes, often due to biased data or flawed algorithms.",
    "What is an AI-powered recommendation system?": "An AI-powered recommendation system suggests products, services, or information to users based on data analysis and predictive algorithms.",
    "What is the role of AI in cybersecurity?": "AI is used in cybersecurity to detect and respond to threats, analyze patterns, and enhance the overall security posture of organizations.",
    "What is the impact of AI on privacy?": "AI can impact privacy by processing large amounts of personal data, raising concerns about data protection and the potential for misuse.",
    "What is a neural network's activation function?": "An activation function in a neural network determines the output of a neuron, introducing non-linearities into the model, which allows it to learn complex patterns.",
    "What is overfitting in Machine Learning?": "Overfitting occurs when a Machine Learning model learns the training data too well, including noise and outliers, resulting in poor performance on new data.",
    "What is Computer Vision?": "Computer Vision is a field of artificial intelligence that enables computers and systems to interpret and process visual information from the world, such as images and videos, in a manner similar to human vision.",
    "How does Computer Vision differ from image processing?": "While image processing involves manipulating pixel values to enhance or transform images, Computer Vision focuses on understanding and interpreting the content of images to make decisions or extract information.",
    "What are common applications of Computer Vision?": "Common applications include facial recognition, autonomous vehicles, medical image analysis, surveillance, augmented reality, and quality inspection in manufacturing.",
    "What is the role of Machine Learning in Computer Vision?": "Machine Learning, especially deep learning, provides models and algorithms that enable Computer Vision systems to learn patterns and features from data, improving their ability to recognize and interpret visual information.",
    "What are Convolutional Neural Networks (CNNs)?": "CNNs are a class of deep neural networks specifically designed to process pixel data. They are widely used in Computer Vision tasks due to their ability to automatically and adaptively learn spatial hierarchies of features.",
    "What is image segmentation?": "Image segmentation is the process of partitioning an image into multiple segments or regions to simplify its representation and make it more meaningful for analysis.",
    "What is object detection?": "Object detection involves identifying and locating objects within an image or video. It provides both the classification and the position of objects.",
    "What is the difference between object detection and object recognition?": "Object recognition identifies what an object is, while object detection identifies what and where an object is within an image or scene.",
    "What is feature extraction in Computer Vision?": "Feature extraction involves identifying and isolating various desired portions or shapes (features) of an image to simplify the analysis of complex data.",
    "What are some popular Computer Vision libraries?": "Popular libraries include OpenCV, TensorFlow, Keras, PyTorch, and MATLAB's Image Processing Toolbox.",
    "What is the importance of datasets in Computer Vision?": "Datasets are crucial for training and evaluating Computer Vision models. High-quality, diverse datasets enable models to learn effectively and generalize to new data.",
    "What challenges does Computer Vision face?": "Challenges include varying lighting conditions, occlusions, viewpoint variations, real-time processing requirements, and the need for large annotated datasets.",
    "How has artificial intelligence (AI) changed Computer Vision?": "AI, particularly deep learning, has significantly improved the accuracy and capabilities of Computer Vision systems by enabling automatic feature learning from large datasets.",
    "What is the role of Computer Vision in autonomous vehicles?": "Computer Vision enables autonomous vehicles to perceive and interpret their surroundings, including detecting obstacles, recognizing traffic signs, and understanding lane markings.",
    "What is the difference between Computer Vision and image processing?": "Image processing focuses on enhancing or manipulating images, while Computer Vision aims to understand and interpret the content of images for decision-making.",
    "What is the significance of edge detection in Computer Vision?": "Edge detection is used to identify the boundaries within images, which is essential for object detection, recognition, and segmentation tasks.",
    "What is the role of Computer Vision in healthcare?": "In healthcare, Computer Vision assists in medical imaging analysis, such as detecting tumors, monitoring patient health, and aiding in diagnostics.",
    "What is the impact of Computer Vision on retail?": "Computer Vision enhances retail by enabling applications like automated checkout systems, customer behavior analysis, and inventory management.",
    "What is the role of Computer Vision in agriculture?": "In agriculture, Computer Vision is used for crop monitoring, disease detection, yield estimation, and precision farming practices.",
    "What is the significance of color space in Computer Vision?": "Color space represents the range of colors in an image. Understanding different color spaces (RGB, HSV, etc.) is important for various image processing tasks.",
    "What is the role of Computer Vision in security and surveillance?": "Computer Vision enables automated monitoring, anomaly detection, facial recognition, and activity analysis in security and surveillance systems.",
    "What is the importance of image annotation in Computer Vision?": "Image annotation involves labeling images with metadata, which is crucial for training supervised learning models in tasks like object detection and segmentation.",
    "What is the role of Computer Vision in sports analytics?": "In sports analytics, Computer Vision is used to track player movements, analyze game strategies, and enhance broadcasting with augmented reality.",
    "What is the significance of depth perception in Computer Vision?": "Depth perception allows Computer Vision systems to understand the three-dimensional structure of a scene, which is important for applications like 3D modeling and navigation.",
    "What is the role of Computer Vision in robotics?": "Computer Vision enables robots to perceive their environment, recognize objects, and perform tasks autonomously in dynamic settings.",
    "What is the impact of Computer Vision on manufacturing?": "In manufacturing, Computer Vision is used for quality control, defect detection, and automation of assembly line processes.",
    "What is the significance of optical character recognition (OCR) in Computer Vision?": "OCR is a technology that converts different types of documents, such as scanned paper documents or PDFs, into editable and searchable data.",
    "What is the role of Computer Vision in augmented reality (AR)?": "Computer Vision enables AR systems to understand and interact with the real world by recognizing and tracking objects and environments.",
    "What is the significance of image stitching in Computer Vision?": "Image stitching involves combining multiple images to produce a panorama or a high-resolution image, which is useful in fields like photography and mapping.",
    "What is the role of Computer Vision in environmental monitoring?": "Computer Vision assists in monitoring environmental changes, detecting pollution levels, and assessing disaster impacts through image and video analysis.",
    "What is the significance of facial recognition in Computer Vision?": "Facial recognition is used for identifying or verifying individuals based on their facial features, with applications in security, authentication, and social media.",
    "What is the role of Computer Vision in education?": "In education, Computer Vision can be used for interactive learning experiences, student engagement analysis, and automated grading systems.",
    "What is the significance of image compression in Computer Vision?": "Image compression reduces the file size of images, which is important for efficient storage and transmission without significant loss of quality.",
    "What is the role of Computer Vision in entertainment?": "Computer Vision enhances entertainment through applications like motion capture, special effects, and content recommendation systems.",
    "What is the significance of gesture recognition in Computer Vision?": "Gesture recognition allows computers to interpret human gestures, enabling intuitive human-computer interaction in applications like gaming and virtual reality.",
    "What is the role of Computer Vision in transportation?": "In transportation, Computer Vision is used for traffic monitoring, license plate recognition, and enhancing driver assistance systems.",
    "What is the significance of pattern recognition in Computer Vision?": "Pattern recognition involves identifying regularities in data, which is fundamental for tasks like image classification and object detection.",
    "What is the role of Computer Vision in archaeology?": "Computer Vision aids in archaeological research by analyzing site images, reconstructing artifacts, and preserving cultural heritage through digital means.",
    "What is Deep Learning?": "Deep Learning is a subset of machine learning that uses multilayered neural networks to simulate the complex decision-making power of the human brain. It enables systems to learn from vast amounts of data.",
    "How does Deep Learning differ from traditional Machine Learning?": "While traditional machine learning relies on manual feature extraction, deep learning automatically discovers representations from data, making it particularly effective for unstructured data like images and text.",
    "What are neural networks?": "Neural networks are computational models inspired by the human brain, consisting of interconnected units (neurons) that process information using dynamic state responses to external inputs.",
    "What is a perceptron?": "A perceptron is the simplest type of artificial neural network, consisting of a single neuron with adjustable weights and a threshold activation function.",
    "What are activation functions?": "Activation functions introduce non-linearity into the neural network, allowing it to learn complex patterns. Common examples include sigmoid, tanh, and ReLU functions.",
    "What is backpropagation?": "Backpropagation is a training algorithm for neural networks that calculates the gradient of the loss function and adjusts the weights to minimize the error.",
    "What is a convolutional neural network (CNN)?": "A CNN is a type of deep neural network primarily used for processing structured grid data like images. It employs convolutional layers to automatically and adaptively learn spatial hierarchies of features.",
    "What is a recurrent neural network (RNN)?": "An RNN is a class of neural networks where connections form cycles, allowing information to persist. They are particularly effective for sequential data like time series or natural language.",
    "What is overfitting in deep learning?": "Overfitting occurs when a model learns not only the underlying patterns in the training data but also the noise, leading to poor performance on unseen data.",
    "How can overfitting be prevented?": "Techniques to prevent overfitting include regularization methods (like L1 and L2), dropout, early stopping, and using more training data.",
    "What is dropout in neural networks?": "Dropout is a regularization technique where, during training, randomly selected neurons are ignored (dropped out), forcing the network to learn more robust features.",
    "What is a learning rate?": "The learning rate is a hyperparameter that controls how much to change the model in response to the estimated error each time the model weights are updated.",
    "What is batch normalization?": "Batch normalization is a technique to improve the training of deep neural networks by normalizing the inputs of each layer to have a consistent distribution, which can lead to faster convergence.",
    "What is a loss function?": "A loss function measures how well the model's predictions match the actual data. It guides the optimization process during training.",
    "What is an epoch in deep learning?": "An epoch refers to one complete pass through the entire training dataset during the training process.",
    "What is meant by transfer learning?": "Transfer learning involves leveraging a pre-trained model on a new, but related, problem, allowing for faster training and often improved performance.",
    "What is transfer learning?": "Transfer learning involves leveraging a pre-trained model on a new, but related, problem, allowing for faster training and often improved performance.",
    "What are autoencoders?": "Autoencoders are neural networks designed to learn efficient codings of input data by training the network to ignore noise and reconstruct the input data.",
    "What is reinforcement learning?": "Reinforcement learning is a type of machine learning where an agent learns to make decisions by performing actions and receiving feedback in the form of rewards or penalties.",
    "What is unsupervised learning?": "Unsupervised learning involves modeling the underlying structure of data without labeled responses, often for clustering or dimensionality reduction.",
    "What is supervised learning?": "Supervised learning involves learning a function that maps an input to an output based on example input-output pairs, using labeled training data.",
    "What is semi-supervised learning?": "Semi-supervised learning combines both labeled and unlabeled data during training, useful when labeled data is scarce.",
    "What is a generative adversarial network (GAN)?": "A GAN is a class of neural networks where two networks, a generator and a discriminator, compete against each other, leading to the generation of new, synthetic data samples.",
    "What is the vanishing gradient problem?": "The vanishing gradient problem occurs when gradients become too small during backpropagation, hindering the effective training of deep networks.",
    "What is the exploding gradient problem?": "The exploding gradient problem occurs when gradients become too large during backpropagation, leading to unstable training processes.",
    "What is a long short-term memory (LSTM) network?": "An LSTM is a type of RNN designed to overcome the vanishing gradient problem by maintaining long-term dependencies, making it effective for sequential data.",
    "What is a gated recurrent unit (GRU)?": "A GRU is a type of RNN similar to LSTM but with a simplified architecture, often leading to faster training while maintaining performance.",
    "What is a softmax function?": "The softmax function converts a vector of values into a probability distribution, commonly used in the output layer of classification models.",
    "What is a Boltzmann machine?": "A Boltzmann machine is a type of stochastic recurrent neural network capable of learning internal representations and solving combinatorial optimization problems.",
    "What is a restricted Boltzmann machine (RBM)?": "An RBM is a simplified version of a Boltzmann machine with a bipartite structure, commonly used for dimensionality reduction and collaborative filtering.",
    "What is a deep belief network (DBN)?": "A DBN is a stack of RBMs where each layer learns to represent the features detected in the previous layer, useful for unsupervised learning tasks.",
    "What is a variational autoencoder (VAE)?": "A VAE is a type of autoencoder that learns a probabilistic representation of the input data, allowing for the generation of new data samples.",
    "What is an embedding in deep learning?": "An embedding is a learned representation of data, often used to convert categorical data into continuous vectors for use in machine learning models.",
    "What is the role of GPUs in deep learning?": "GPUs accelerate deep learning computations by parallelizing operations, significantly reducing training times for large models.",
    "What is model hyperparameter tuning?": "Hyperparameter tuning involves selecting the optimal set of hyperparameters for a learning algorithm to improve model performance.",
    "What is cross-validation?": "Cross-validation is a technique for assessing how a model generalizes to an independent dataset, commonly used to prevent overfitting.",
    "What is data augmentation?": "Data augmentation involves creating modified versions of the original data to increase the diversity of the training set, improving model robustness.",
    "What is a learning rate scheduler?": "A learning rate scheduler adjusts the learning rate during training, often to improve convergence and performance.",
    "What is a weight initialization technique?": "Weight initialization techniques set the initial values of weights in a neural network, impacting the convergence and performance of the model.",
    "Why is it called the Internet of Things?": "It's called the Internet of Things because it extends internet connectivity beyond traditional devices like computers and smartphones to a diverse range of objects and environments.",
    "What is an IoT solution?": "An IoT solution is a combination of devices or other data sources, outfitted with sensors and internet-connected hardware, to securely report information back to an IoT platform.",
    "What is an IoT Proof of Concept (PoC)?": "An IoT PoC is a demonstration to validate the feasibility and potential of an IoT solution before full-scale implementation.",
    "What is an IoT cloud platform?": "An IoT cloud platform is a suite of cloud-based services that enables the development, deployment, and management of IoT applications and devices.",
    "What is AWS IoT Core?": "AWS IoT Core is a managed cloud service that allows connected devices to interact securely with cloud applications and other devices.",
    "How does AWS IoT Core work?": "AWS IoT Core works by providing secure communication and data processing between IoT devices and the AWS cloud, enabling real-time data collection and analysis.",
    "What is the difference between IoT and M2M?": "While both IoT and Machine-to-Machine (M2M) involve devices communicating, IoT typically refers to a broader network of connected devices using IP-based communication, whereas M2M often involves direct communication between devices using non-IP protocols.",
    "What are common applications of IoT?": "Common applications include smart homes, wearable health monitors, smart cities, industrial automation, and connected vehicles.",
    "What are the security concerns with IoT?": "Security concerns include data privacy, unauthorized access, device tampering, and ensuring secure communication channels.",
    "How is data collected in IoT?": "Data is collected through sensors embedded in IoT devices, which gather information from their environment and transmit it for processing.",
    "What is edge computing in IoT?": "Edge computing refers to processing data near the source of data generation (i.e., IoT devices) rather than relying solely on centralized cloud servers, reducing latency and bandwidth use.",
    "What is the role of AI in IoT?": "AI enables IoT systems to analyze large volumes of data, make predictions, and automate responses, enhancing decision-making and operational efficiency.",
    "How do IoT devices communicate?": "IoT devices communicate using various protocols such as MQTT, HTTP, CoAP, and through connectivity options like Wi-Fi, Bluetooth, Zigbee, and cellular networks.",
    "What is an IoT gateway?": "An IoT gateway is a physical device or software program that serves as the connection point between IoT devices and the cloud, facilitating data flow between them.",
    "What are the challenges in implementing IoT?": "Challenges include ensuring interoperability, managing data security, handling large data volumes, and providing reliable connectivity.",
    "What is the importance of standardization in IoT?": "Standardization ensures compatibility and interoperability between different IoT devices and systems, facilitating seamless integration and communication.",
    "How does IoT impact daily life?": "IoT impacts daily life by enabling smart home automation, improving healthcare monitoring, enhancing energy efficiency, and providing real-time information through connected devices.",
    "What is the future of IoT?": "The future of IoT includes increased device connectivity, advancements in AI integration, enhanced security measures, and broader adoption across various industries.",
    "What is an IoT ecosystem?": "An IoT ecosystem comprises all the components that enable IoT solutions, including devices, networks, cloud platforms, applications, and users.",
    "How does IoT contribute to smart cities?": "IoT contributes to smart cities by enabling intelligent traffic management, efficient energy usage, enhanced public safety, and improved urban planning through data-driven insights.",
    "What are IoT sensors?": "IoT sensors are devices that detect and measure physical properties from the environment, such as temperature, humidity, motion, and transmit this data for analysis.",
    "What is the role of big data in IoT?": "Big data technologies process and analyze the vast amounts of data generated by IoT devices, uncovering patterns, trends, and insights to inform decision-making.",
    "How do IoT devices handle power consumption?": "IoT devices handle power consumption through energy-efficient hardware design, low-power communication protocols, and power management strategies to extend battery life.",
    "What is the significance of IPv6 in IoT?": "IPv6 provides a larger address space, essential for accommodating the vast number of IoT devices connecting to the internet.",
    "How does IoT integrate with existing IT infrastructure?": "IoT integrates with existing IT infrastructure through APIs, middleware, and compatible communication protocols, allowing seamless data exchange and system interoperability.",
    "What is the impact of IoT on supply chain management?": "IoT impacts supply chain management by providing real-time tracking of goods, optimizing inventory levels, improving logistics, and enhancing transparency.",
    "What are wearable IoT devices?": "Wearable IoT devices are smart electronic devices worn on the body, such as fitness trackers and smartwatches, that collect and transmit data related to health and activity.",
    "How is IoT used in agriculture?": "IoT is used in agriculture through precision farming techniques, utilizing sensors and data analytics to monitor soil conditions, crop health, and optimize resource usage.",
    "What is the role of blockchain in IoT?": "Blockchain provides a secure and transparent method for recording transactions and data exchanges between IoT devices, enhancing security and trust.",
    "How do IoT devices update their firmware?": "IoT devices update their firmware through over-the-air (OTA) updates, allowing remote deployment of new software versions to fix bugs or add features.",
    "What is the significance of data analytics in IoT?": "Data analytics in IoT enables the extraction of meaningful insights from the vast data generated by devices, supporting informed decision-making and predictive analysis.",
    "How does IoT impact energy management?": "IoT impacts energy management by enabling smart grids, real-time monitoring of energy consumption, and automation of energy-saving measures.",
    "What are the ethical considerations in IoT?": "Ethical considerations include data privacy, consent for data collection, surveillance concerns, and ensuring equitable access to IoT technologies.",
    "How does IoT support remote monitoring?": "IoT supports remote monitoring by allowing devices to transmit data over the internet, enabling users to monitor and control systems from distant locations.",
    "What is the role of machine learning in IoT?": "Machine learning in IoT involves analyzing data patterns to make predictions, detect anomalies, and automate decision-making processes.",
    "What is Natural Language Processing (NLP)?": "Natural Language Processing (NLP) is a subfield of artificial intelligence that focuses on the interaction between computers and human languages, enabling machines to understand, interpret, and generate human language.",
    "What are the main applications of NLP?": "NLP is used in various applications including sentiment analysis, machine translation, chatbots, speech recognition, text summarization, and information extraction.",
    "What is tokenization in NLP?": "Tokenization is the process of breaking down text into smaller units, such as words or sentences, to facilitate analysis.",
    "What is the difference between stemming and lemmatization?": "Stemming reduces words to their root form by removing suffixes, while lemmatization reduces words to their base or dictionary form, considering the context.",
    "What is sentiment analysis?": "Sentiment analysis is the process of determining the emotional tone or attitude expressed in a piece of text, often categorized as positive, negative, or neutral.",
    "What are stop words in NLP?": "Stop words are common words like 'and', 'the', and 'is' that are often removed from text during preprocessing because they carry little meaningful information.",
    "What is part-of-speech tagging?": "Part-of-speech tagging involves labeling words in a text with their corresponding part of speech, such as noun, verb, adjective, etc.",
    "What is named entity recognition (NER)?": "Named Entity Recognition is the process of identifying and classifying proper nouns in text, such as names of people, organizations, locations, dates, and other specific terms.",
    "What is the Bag of Words model?": "The Bag of Words model represents text data by counting the frequency of words, disregarding grammar and word order.",
    "What is TF-IDF?": "Term Frequency-Inverse Document Frequency (TF-IDF) is a statistical measure used to evaluate the importance of a word in a document relative to a collection of documents.",
    "What is word embedding?": "Word embedding is a technique where words are represented as dense vectors in a continuous vector space, capturing semantic relationships between words.",
    "What is the difference between NLP and NLU?": "Natural Language Processing (NLP) encompasses the entire process of analyzing and generating human language, while Natural Language Understanding (NLU) focuses specifically on comprehending the meaning and intent behind the text.",
    "What are n-grams in NLP?": "N-grams are contiguous sequences of n items (words or characters) from a given text, used to analyze the context and predict the next item in sequences.",
    "What is a language model?": "A language model is a probabilistic model that predicts the likelihood of a sequence of words, aiding in tasks like speech recognition and text generation.",
    "What is the role of machine learning in NLP?": "Machine learning enables NLP systems to learn patterns and make predictions from data, improving tasks like classification, translation, and summarization.",
    "What are some common NLP libraries?": "Popular NLP libraries include NLTK, spaCy, Gensim, and Transformers.",
    "What is the difference between rule-based and statistical NLP?": "Rule-based NLP relies on handcrafted linguistic rules, while statistical NLP uses probabilistic models trained on large datasets to learn patterns.",
    "What is transfer learning in NLP?": "Transfer learning involves pre-training a model on a large corpus and fine-tuning it on a specific task, improving performance with less data.",
    "What is the importance of context in NLP?": "Context helps disambiguate words with multiple meanings and improves the accuracy of language models by considering surrounding words and sentences.",
    "What are some challenges in NLP?": "Challenges include handling ambiguity, understanding context, managing diverse languages and dialects, and processing idiomatic expressions.",
    "What is coreference resolution?": "Coreference resolution is the task of identifying expressions that refer to the same entity in a text, such as pronouns and their antecedents.",
    "What is the difference between syntactic and semantic analysis?": "Syntactic analysis focuses on the grammatical structure of sentences, while semantic analysis aims to understand the meaning conveyed by the text.",
    "What is a corpus in NLP?": "A corpus is a large and structured set of texts used for training and evaluating NLP models.",
    "What is the role of deep learning in NLP?": "Deep learning models, such as neural networks, have significantly improved NLP tasks by capturing complex patterns and representations in language data.",
    "What is the difference between speech recognition and NLP?": "Speech recognition converts spoken language into text, while NLP processes and analyzes the text to derive meaning and insights.",
    "What is text summarization?": "Text summarization is the process of creating a concise and coherent summary of a longer text document.",
    "What is information retrieval in NLP?": "Information retrieval involves finding relevant documents or pieces of information from a large repository based on user queries.",
    "What is topic modeling?": "Topic modeling is an unsupervised learning technique used to identify themes or topics within a collection of documents.",
    "What is the role of attention mechanisms in NLP?": "Attention mechanisms allow models to focus on relevant parts of the input sequence, improving performance in tasks like translation and summarization.",
    "What is the difference between supervised and unsupervised learning in NLP?": "Supervised learning uses labeled data to train models, while unsupervised learning finds patterns in unlabeled data.",
    "What is the significance of word sense disambiguation?": "Word sense disambiguation is the process of determining the correct meaning of a word in context, which is crucial for accurate language understanding.",
    "What is a chatbot?": "A chatbot is a software application that uses NLP to simulate human-like conversations with users.",
    "What is the role of reinforcement learning in NLP?": "Reinforcement learning can be used to optimize dialogue systems and other interactive NLP applications by learning from user feedback.",
    "What is the difference between online and batch learning in NLP?": "Online learning updates the model incrementally as new data arrives, while batch learning updates the model using the entire dataset at once.",
    "What is the importance of evaluation metrics in NLP?": "Evaluation metrics, such as accuracy, precision, recall, and F1-score, are essential for assessing the performance of NLP models.",
    "What is the role of syntax trees in NLP?": "Syntax trees represent the grammatical structure of sentences, aiding in parsing and understanding complex sentence constructions.",
    "What is zero-shot learning in NLP?": "Zero-shot learning enables models to handle tasks or classes they were not explicitly trained on by leveraging knowledge from related tasks.",
    "What is the difference between extractive and abstractive summarization?": "Extractive summarization selects sentences directly from the source text, while abstractive summarization generates new sentences that capture the essence of the original text.",
    "What is the role of ontologies in NLP?": "Ontologies provide structured representations of knowledge domains, facilitating better understanding and reasoning in NLP systems."
}

def initialize_database():
    conn = sqlite3.connect("faq.db")
    cursor = conn.cursor()

    # Create table if it doesn't exist
    cursor.execute('''CREATE TABLE IF NOT EXISTS faqs (
                        id INTEGER PRIMARY KEY AUTOINCREMENT,
                        question TEXT UNIQUE,
                        answer TEXT)''')

    # Insert predefined FAQs only if they are not already in the database
    for question, answer in faq_dict.items():
        cursor.execute("INSERT OR IGNORE INTO faqs (question, answer) VALUES (?, ?)", (question, answer))

    conn.commit()
    conn.close()
    print(" Debug: FAQs initialized in the database.")

# Call this when the chatbot starts
initialize_database()

In [None]:
nlp = spacy.load("en_core_web_sm")

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)  # Remove special characters
    return text

In [None]:
# Function to get a database connection safely
def get_db_connection():
    return sqlite3.connect("faq.db", check_same_thread=False)

# Use this function everywhere you connect to the database
def add_faq_to_db(question, answer):
    conn = get_db_connection()
    cursor = conn.cursor()
    try:
        cursor.execute("INSERT INTO faqs (question, answer) VALUES (?, ?)", (question, answer))
        conn.commit()
        print("Debug: New FAQ added to database.")
    except sqlite3.IntegrityError:
        print("Debug: Duplicate question detected. Skipping insertion.")
    finally:
        conn.close()


In [None]:
# Initialize NLTK utilities
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()


# Function to find the most relevant FAQ answer
faiss_index = None  # Global FAISS index
faiss_faqs = []  # Stores FAQs
faiss_vectorizer = None  # Stores TF-IDF vectorizer

def build_faiss_index():
    global faiss_index, faiss_faqs, faiss_vectorizer

    # Connect to database
    conn = sqlite3.connect("faq.db")
    cursor = conn.cursor()
    cursor.execute("SELECT id, question, answer FROM faqs")
    faqs = cursor.fetchall()
    conn.close()

    # Handle case where no FAQs exist
    if not faqs:
        print(" Debug: No FAQs found in the database.")
        return None, [], None

    # Preprocess questions
    preprocessed_questions = [preprocess_text(q) for _, q, _ in faqs]

    # Convert text to numerical vectors using TF-IDF
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(preprocessed_questions).toarray().astype(np.float32)

    # Initialize FAISS index
    index = faiss.IndexFlatL2(tfidf_matrix.shape[1])
    index.add(tfidf_matrix)

    # Store global variables
    faiss_index, faiss_faqs, faiss_vectorizer = index, faqs, vectorizer

    print(f" Debug: FAISS index built with {len(faqs)} FAQs.")
    return faiss_index, faiss_faqs, faiss_vectorizer

# Call to build the index at chatbot startup
build_faiss_index()


def get_faq_answer(user_query):
    global faiss_index, faiss_faqs, faiss_vectorizer

    if faiss_index is None:
        build_faiss_index()

    processed_query = preprocess_text(user_query)
    query_vector = faiss_vectorizer.transform([processed_query]).toarray().astype(np.float32)

    distances, match_index = faiss_index.search(query_vector, 1)
    best_match = match_index[0][0]

    if best_match == -1:
        return "I'm sorry, but I couldn't find an answer."

    best_score = 1 / (1 + distances[0][0])  # Normalize similarity score

    print(f"Debug: Best match similarity score: {best_score:.2f}")

    if best_score < 0.6:  # Lower confidence means we ask for help
        print("Chatbot: I'm not sure. Can you teach me?")
        new_answer = input("Type the correct answer or 'skip': ")
        if new_answer.lower() != "skip":
            conn = sqlite3.connect("faq.db")
            cursor = conn.cursor()
            cursor.execute("INSERT INTO faqs (question, answer) VALUES (?, ?)", (user_query, new_answer))
            conn.commit()
            conn.close()

            print("Chatbot: Thanks! I've learned something new.")
            build_faiss_index()  # Update FAISS with new data
        return "I'm sorry, but I couldn't find an exact answer."

    return faiss_faqs[best_match][2]







# Example chatbot interaction
def chatbot():
    print("Hello! I am your AI-powered FAQ chatbot. Ask me anything.")
    
    while True:
        user_input = input("You: ").strip()
        if not user_input:
            print("Chatbot: Please type something!")
            continue
        
        if user_input.lower() in ["exit", "quit", "bye"]:
            print("Chatbot: Goodbye!")
            break

        # Sentiment Analysis
        try:
            sentiment = TextBlob(user_input).sentiment.polarity
            if sentiment < -0.2:
                print("Chatbot: I sense frustration. I'm here to help!")
        except Exception as e:
            print(f"Chatbot: (Debug: Sentiment analysis failed - {str(e)})")

        # Get response from FAQ system
        response = get_faq_answer(user_input)
        print(f"Chatbot: {response}")

# Run chatbot when script is executed
if __name__ == "__main__":
    chatbot()
