<a href="https://colab.research.google.com/github/ArturPap/chatbot_AI/blob/main/chatbot_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader
import torch
import sqlite3
import logging
from typing import List, Dict, Any
import asyncio

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class AIPoweredChatbot:
    def __init__(self, model_name: str = "distilgpt2", db_path: str = "chatbot_data.db"):
        """Initialize the AI chatbot with RAG capabilities."""
        try:
            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
            self.model = AutoModelForCausalLM.from_pretrained(model_name).to(self.device)
            self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
            self.vector_store = None
            self.db_path = db_path
            self._initialize_database()
            logger.info("Chatbot initialized successfully")
        except Exception as e:
            logger.error(f"Initialization failed: {str(e)}")
            raise

    def _initialize_database(self) -> None:
        """Set up SQLite database for storing conversation history."""
        try:
            with sqlite3.connect(self.db_path) as conn:
                cursor = conn.cursor()
                cursor.execute("""
                    CREATE TABLE IF NOT EXISTS conversations (
                        id INTEGER PRIMARY KEY AUTOINCREMENT,
                        user_input TEXT,
                        response TEXT,
                        timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
                    )
                """)
                conn.commit()
        except sqlite3.Error as e:
            logger.error(f"Database initialization failed: {str(e)}")
            raise

    def load_documents(self, file_path: str) -> None:
        """Load and index documents for RAG."""
        try:
            loader = PyPDFLoader(file_path)
            documents = loader.load()
            texts = [doc.page_content for doc in documents]
            self.vector_store = FAISS.from_texts(texts, self.embeddings)
            logger.info(f"Loaded and indexed {len(texts)} documents")
        except Exception as e:
            logger.error(f"Document loading failed: {str(e)}")
            raise

    async def generate_response(self, user_input: str, max_length: int = 100) -> str:
        """Generate response using RAG and LLM."""
        try:
            # Retrieve relevant context using RAG
            if self.vector_store:
                docs = self.vector_store.similarity_search(user_input, k=3)
                context = " ".join([doc.page_content for doc in docs])
            else:
                context = ""

            # Prepare input for the model
            prompt = f"Context: {context}\nUser: {user_input}\nAssistant: "
            inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)

            # Generate response
            outputs = self.model.generate(
                **inputs,
                max_length=max_length,
                num_return_sequences=1,
                temperature=0.7,
                top_p=0.9,
                do_sample=True
            )

            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

            # Store conversation in database
            self._store_conversation(user_input, response)

            logger.info("Response generated successfully")
            return response
        except Exception as e:
            logger.error(f"Response generation failed: {str(e)}")
            return "An error occurred while generating the response."

    def _store_conversation(self, user_input: str, response: str) -> None:
        """Store conversation in the database."""
        try:
            with sqlite3.connect(self.db_path) as conn:
                cursor = conn.cursor()
                cursor.execute(
                    "INSERT INTO conversations (user_input, response) VALUES (?, ?)",
                    (user_input, response)
                )
                conn.commit()
        except sqlite3.Error as e:
            logger.error(f"Failed to store conversation: {str(e)}")

    def analyze_conversation_history(self) -> pd.DataFrame:
        """Analyze conversation history using Pandas."""
        try:
            with sqlite3.connect(self.db_path) as conn:
                df = pd.read_sql_query("SELECT * FROM conversations", conn)

            # Basic analysis
            df['response_length'] = df['response'].str.len()
            df['timestamp'] = pd.to_datetime(df['timestamp'])

            logger.info("Conversation history analyzed")
            return df
        except Exception as e:
            logger.error(f"Conversation analysis failed: {str(e)}")
            return pd.DataFrame()

async def main():
    """Main function to demonstrate chatbot usage."""
    chatbot = AIPoweredChatbot()

    # Load sample documents (assuming a PDF file exists)
    try:
        chatbot.load_documents("sample_knowledge_base.pdf")
    except Exception as e:
        logger.warning(f"Could not load documents: {str(e)}")

    # Example interaction
    user_input = "What is machine learning?"
    response = await chatbot.generate_response(user_input)
    print(f"User: {user_input}")
    print(f"Assistant: {response}")

    # Analyze conversation history
    analysis = chatbot.analyze_conversation_history()
    print("\nConversation Analysis:")
    print(analysis.describe())

if __name__ == "__main__":
    asyncio.run(main())