In [None]:
import os
import re
from flask import Flask, request
from slack_sdk import WebClient
from slack_bolt import App
from slack_bolt.adapter.flask import SlackRequestHandler
import pandas as pd
import fitz  # PyMuPDF
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from dotenv import find_dotenv, load_dotenv
from langchain.document_loaders import YoutubeLoader  # For YouTube transcripts
import wikipedia
from serpapi import GoogleSearch

# Load environment variables
load_dotenv(find_dotenv())

# Set Slack API credentials
SLACK_BOT_TOKEN = os.environ["SLACK_BOT_TOKEN"]
SLACK_SIGNING_SECRET = os.environ["SLACK_SIGNING_SECRET"]

# Initialize the Slack app and Flask app
app = App(token=SLACK_BOT_TOKEN, signing_secret=SLACK_SIGNING_SECRET)
flask_app = Flask(__name__)
handler = SlackRequestHandler(app)

# OpenAI-based language model with conversational capabilities
chat = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.5)

# Create a memory buffer for maintaining context in conversations
memory = ConversationBufferMemory(memory_key="conversation", return_messages=True, k=20)

# Define the default system message template for Alphie
system_template = """
You are Alphie, a highly capable AI assistant.
Your main roles are to:
- Answer user questions with detailed and accurate information.
- Assist with various tasks, including programming, debugging, writing, data analysis, and more.
- Provide suggestions for personal and professional development.
- Engage in meaningful conversations, maintaining context to ensure continuity.

Specific guidance:
- Be friendly, helpful, and responsive.
- Provide useful advice and clear explanations.
- For technical queries, offer code examples when appropriate.
- For general knowledge, be informative and concise.
- For questions about specific documents or content, extract relevant information and mention sources.
- If unsure, ask for clarification or suggest additional resources.
"""

system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)
human_template = "User says: {user_input}."
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

# Conversational chain with memory
chain = LLMChain(llm=chat, prompt=chat_prompt, memory=memory)

# Create a function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    reader = fitz.open(pdf_path)
    raw_text = ""
    for page in reader:
        text = page.get_text()
        if text:
            raw_text += text + "\n"
    return raw_text

# Define a function to clean text
def clean_text(text):
    text = re.sub(r'\s+', ' ', text)  # Replace multiple whitespace with a single space
    text = text.strip()  # Strip leading and trailing whitespace
    return text

# Function to analyze PDF content
def analyze_pdf(pdf_path, question):
    raw_text = extract_text_from_pdf(pdf_path)
    cleaned_text = clean_text(raw_text)

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1500,
        chunk_overlap=400,
        separators=["\n", " ", "."],
    )
    texts = text_splitter.split_text(cleaned_text)

    embeddings = OpenAIEmbeddings()
    docsearch = FAISS.from_documents(texts, embeddings)

    qa_chain = LLMChain(llm=ChatOpenAI(), prompt=system_message_prompt)

    docs = docsearch.similarity_search(question, k=7)
    answer = qa_chain.run({"docs": " ".join([doc.page_content for doc in docs]), "question": question})
    return answer

# Create a function to build a database from a YouTube video
def create_db_from_youtube_video_url(video_url):
    loader = YoutubeLoader.from_youtube_url(video_url)
    transcript = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
    docs = text_splitter.split_documents(transcript)

    db = FAISS.from_documents(docs, OpenAIEmbeddings())
    return db

# Function to generate responses from YouTube transcripts
def get_response_from_query(db, query, k=4):
    docs = db.similarity_search(query, k=k)
    docs_page_content = " ".join([d.page_content for d in docs])

    chat = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.2)

    system_template = """
You are a helpful assistant who can answer questions about YouTube videos based on their transcripts: {docs}.
Use factual information from the transcript to answer questions.
If you don't have enough information, say "I don't know".
"""
    system_prompt = SystemMessagePromptTemplate.from_template(system_template)

    human_template = "Answer the following question: {question}."
    human_prompt = HumanMessagePromptTemplate.from_template(human_template)

    chat_prompt = ChatPromptTemplate.from_messages([system_prompt, human_prompt])

    llm_chain = LLMChain(llm=chat, prompt=chat_prompt)

    response = llm_chain.run({"question": query, "docs": docs_page_content})
    return response

# Slack event handler for message events
@app.event("message")
def handle_message_events(event, say):
    user_text = event.get("text", "").lower()

    # Respond to greetings
    if user_text in ["hi", "hello", "hey"]:
        say("Hello! I'm Alphie, your friendly AI assistant at the New Mexico Tax & Rev. How can I help you today?")
        return

    # Check if the message contains a YouTube link
    youtube_pattern = r"https?://(?:www\.)?youtube\.com/watch\?v=[\w-]+"
    youtube_links = re.findall(youtube_pattern, user_text)

    if youtube_links:
        video_url = youtube_links[0]
        db = create_db_from_youtube_video_url(video_url)
        say("I've extracted the transcript from the provided YouTube video. You can ask questions about it.")
        return

    # Default response for other queries
    response = chain.run({"user_input": user_text})
    say(response)

# Flask endpoint for Slack events
@flask_app.route("/slack/events", methods=["POST"])
def slack_events():
    return handler.handle(request)

# Initialize the Flask app
if __name__ in "__main__":
    flask_app.run(port=8080)


  warn_deprecated(


 * Serving Flask app '__main__'
 * Debug mode: off


  warn_deprecated(
 * Running on http://127.0.0.1:8080
Press CTRL+C to quit
127.0.0.1 - - [10/May/2024 15:30:25] "POST /slack/events HTTP/1.1" 200 -
127.0.0.1 - - [10/May/2024 15:30:26] "POST /slack/events HTTP/1.1" 200 -
127.0.0.1 - - [10/May/2024 15:31:33] "POST /slack/events HTTP/1.1" 200 -
  warn_deprecated(
Failed to run listener function (error: list index out of range)
Traceback (most recent call last):
  File "C:\Users\asifr\AppData\Roaming\Python\Python311\site-packages\slack_bolt\listener\thread_runner.py", line 120, in run_ack_function_asynchronously
    listener.run_ack_function(request=request, response=response)
  File "C:\Users\asifr\AppData\Roaming\Python\Python311\site-packages\slack_bolt\listener\custom_listener.py", line 50, in run_ack_function
    return self.ack_function(
           ^^^^^^^^^^^^^^^^^^
  File "C:\Users\asifr\AppData\Local\Temp\ipykernel_29868\1188181078.py", line 154, in handle_message_events
    db = create_db_from_youtube_video_url(video_url)
    