## <span style = "color:brown"> <span style = "font-family: Times New Roman"> Event Planner DataBase Management Tool </span>
***
### <span style = "font-family: Times New Roman"> Don't remember the flourist you booked during an event project last year. No worries, store your past projects and you will never have to worry about forgetting details</span>

****



In [None]:
#uncomment below line to install all the libraries

#!pip install langchain langchain-openai langchain-chroma matplotlib scikit-learn numpy plotly python-dotenv openai gradio
#!pip install -U langchain_huggingface

In [None]:
#libraries import
import os
import glob

from dotenv import load_dotenv
#Required for using API
from openai import OpenAI
#Required when programming usings ollama

import gradio as gr
#must for best user interface experience

''' 📂 Document Handling & Preprocessing'''
from langchain.document_loaders import DirectoryLoader, TextLoader
#DirectoryLoader: Loads multiple text documents from a specified directory.
#TextLoader: Loads individual text files into LangChain's document format.

from langchain.text_splitter import CharacterTextSplitter
#Splits large text documents into smaller chunks based on character count & Helps with token limitations in LLMs 

from langchain.schema import Document
#Represents a single text document with metadata. Use Case: Helps in organizing data before processing

''' 🔎 Embeddings & LLM Models'''
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
#OpenAIEmbeddings: Converts text into numerical embeddings using OpenAI’s models, allowing vector searches
#ChatOpenAI: Provides access to OpenAI’s chat-based LLM (e.g., GPT-4).

from langchain_chroma import Chroma
#Chroma is a vector database for storing and retrieving embeddings.

''' 📊 Data Visualization'''
import matplotlib as plt
#python lib for ploting graphs

from sklearn.manifold import TSNE
#t-SNE (t-distributed Stochastic Neighbor Embedding) is a dimensionality reduction technique.Use Case: Helps visualize high-dimensional embeddings in 2D/3D

import numpy as np
#numerical computation

import plotly.graph_objects as go
#Used for interactive 3D visualization of embeddings.

''' 🧠 Conversational Memory & Chains'''
from langchain.memory import ConversationBufferMemory
#Stores conversation history in a buffer i.e. Enables context retention

from langchain.chains import ConversationalRetrievalChain
#A chain that combines conversation memory with a retrieval-based model. Use Case: Powers RAG (Retrieval-Augmented Generation) chatbots.

from langchain.embeddings import HuggingFaceEmbeddings
#Loads pretrained embeddings from Hugging Face models (e.g., BERT, SentenceTransformers).

from langchain_huggingface import HuggingFaceEmbeddings
#using Langchain hugging-face embeddings to save on cost

from langchain_core.callbacks import StdOutCallbackHandler
#to get the thinking behing the answers

In [None]:
# Load environment variables in a file called .env
# uncomment this part if you are using ollama
# MODEL = "gpt-4o-mini"
# load_dotenv()
# os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')

#if not using llama - comment this
MODEL ='llama3.2'

In [None]:
''' Saving the data in chunks: Ensure that your main folder has sub folders containing .log files inside it'''

db_name = "vector_db"
folders = glob.glob("database/*")

def add_metadata(doc, doc_type):
    doc.metadata["doc_type"] = doc_type
    return doc

text_loader_kwargs={'autodetect_encoding': True}

documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.log", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
    folder_docs = loader.load()
    documents.extend([add_metadata(doc, doc_type) for doc in folder_docs])

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

print(f"Total number of chunks: {len(chunks)}")
print(f"Document types found: {set(doc.metadata['doc_type'] for doc in documents)}")

In [None]:
''' Saving all the chunks in a vector database'''
# use below code if you have openAI API
#embeddings = OpenAIEmbeddings()


embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Delete if already exists
if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

# Create vectorstore
vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

In [None]:
# Let's investigate the vectors
collection = vectorstore._collection
count = collection.count()

sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
dimensions = len(sample_embedding)
print(f"There are {count:,} vectors with {dimensions:,} dimensions in the vector store")

## <span style = "font-family: Great Vibes, cursive"> 📊 Visualizing our data</snap>

### Skip this part if not interested in this. But its fun to have a look
***

In [None]:
# Prework (with thanks to Jon R for identifying and fixing a bug in this!)

result = collection.get(include=['embeddings', 'documents', 'metadatas'])
vectors = np.array(result['embeddings'])
documents = result['documents']
metadatas = result['metadatas']
doc_types = [metadata['doc_type'] for metadata in metadatas]
colors = [['blue', 'green'][['wedding_logs', 'birthday_logs'].index(t)] for t in doc_types] # you will need to uodate this manually based on your database

In [None]:
# We humans find it easier to visalize things in 2D!
# Reduce the dimensionality of the vectors to 2D using t-SNE
# (t-distributed stochastic neighbor embedding)

perplexity_value = min(10, 16 - 1)  # Ensure perplexity < n_samples

tsne = TSNE(n_components=2, perplexity=perplexity_value, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 2D scatter plot
fig = go.Figure(data=[go.Scatter(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='2D Chroma Vector Store Visualization',
    scene=dict(xaxis_title='x',yaxis_title='y'),
    width=800,
    height=600,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

In [None]:
# Let's try 3D!
perplexity_value = min(10, 16 - 1)  # Ensure perplexity < n_samples

tsne = TSNE(n_components=3, perplexity=perplexity_value, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    z=reduced_vectors[:, 2],
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='3D Chroma Vector Store Visualization',
    scene=dict(xaxis_title='x', yaxis_title='y', zaxis_title='z'),
    width=900,
    height=700,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

## Time to use LangChain to bring it all together

In [None]:
# create a new Chat with OpenAI - uncomment below line for API instead
#llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

# if you don't want to use Ollama locally, comment this line #spoiler it takes ages to give you an answer
llm = ChatOpenAI(temperature=0.7, model_name='llama3.2', base_url='http://localhost:11434/v1', api_key='ollama')

# set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# the retriever is an abstraction over the VectorStore that will be used during RAG
retriever = vectorstore.as_retriever()

# putting it together: set up the conversation chain with the GPT 3.5 LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [None]:
# Let's try a simple question

query = "Who married on 15th June 2024?"
result = conversation_chain.invoke({"question": query})
print(result["answer"])

In [None]:
# set up a new conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# putting it together: set up the conversation chain with the GPT 4o-mini LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

## <span style = "font-family: Times New Roman"> Let's Chat Now!! </span>


In [None]:
# Let's investigate what gets sent behind the scenes #well the reasoning behing the answers

# uncomment to use OpenAI
#llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

#comment below if using  OpenAI 
llm = ChatOpenAI(temperature=0.7, model_name='llama3.2', base_url='http://localhost:11434/v1', api_key='ollama')

memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

retriever = vectorstore.as_retriever()

conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory, callbacks=[StdOutCallbackHandler()])

query = "Who had Enchanted Garden theme?"
result = conversation_chain.invoke({"question": query})
answer = result["answer"]
print("\nAnswer:", answer)

In [None]:
# uncomment to use OpenAI
#llm = ChatOpenAI(temperature=0.7, model_name=MODEL)
#comment below if using  OpenAI 
llm = ChatOpenAI(temperature=0.7, model_name='llama3.2', base_url='http://localhost:11434/v1', api_key='ollama')

# set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# the retriever is an abstraction over the VectorStore that will be used during RAG; k is how many chunks to use
retriever = vectorstore.as_retriever(search_kwargs={"k": 25})

# putting it together: set up the conversation chain with the GPT 3.5 LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [None]:
def chat(question, history):
    result = conversation_chain.invoke({"question": question})
    return result["answer"]

In [None]:


force_dark_mode = """
function refresh() {
    const url = new URL(window.location);
    if (url.searchParams.get('__theme') !== 'dark') {
        url.searchParams.set('__theme', 'dark');
        window.location.href = url.href;
    }
}
"""

custom_css = """
     /* Import a stylish script font */
    @import url('https://fonts.googleapis.com/css2?family=Great+Vibes&display=swap');
    
    .gradio-container h1 {
        text-align: center;
        font-size: 60px;
        font-weight: 700;
        font-style: italic;
        font-family: 'Great Vibes', cursive;  /* Elegant script font */
        text-transform: none;
        background: linear-gradient(to right, #ff66b2, #ff99cc, #ffffff); /* Pink-White Gradient */
        -webkit-background-clip: text;
        -webkit-text-fill-color: transparent;
        letter-spacing: 3px;        
        text-shadow: 2px 2px 5px rgba(255, 105, 180, 0.4); /* Subtle pink glow */
    }
    .emoji {
        -webkit-background-clip: unset;
        -webkit-text-fill-color: unset;
    }

"""

with gr.Blocks(css=custom_css, js=force_dark_mode) as demo:
    
    gr.Markdown("# 🧠 Need My Help to Remember?")
    gr.ChatInterface(chat, type="messages")


In [None]:
def launch_chatbot():
    demo.close() 
    demo.launch(server_name="0.0.0.0", server_port=None, inbrowser=True)  # Gradio will pick a free port


if __name__ == "__main__":
    launch_chatbot()