<a href="https://colab.research.google.com/github/SaravanaPrakash-1925/SaravanaPrakash-1925/blob/main/TalentWhiz_AI_Unleash_the_Power_of_Talent_Selection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain pypdf openai chromadb tiktoken docx2txt

In [None]:
import os
import sys
import warnings
from dotenv import load_dotenv
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import Docx2txtLoader
from langchain.document_loaders import TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.text_splitter import CharacterTextSplitter
from IPython.display import clear_output

load_dotenv('.env')

documents = []
# Create a List of Documents from all of our files in the ./docs folder
for file in os.listdir("docs"):
    if file.endswith(".pdf"):
        pdf_path = "./docs/" + file
        loader = PyPDFLoader(pdf_path)
        documents.extend(loader.load())
    elif file.endswith('.docx') or file.endswith('.doc'):
        doc_path = "./docs/" + file
        loader = Docx2txtLoader(doc_path)
        documents.extend(loader.load())
    elif file.endswith('.txt'):
        text_path = "./docs/" + file
        loader = TextLoader(text_path)
        documents.extend(loader.load())

# Split the documents into smaller chunks
text_splitter = CharacterTextSplitter(chunk_size=1200, chunk_overlap=10)

# Suppress the warning from CharacterTextSplitter
warnings.filterwarnings("ignore", category=UserWarning)

documents = text_splitter.split_documents(documents)

# Convert the document chunks to embeddings and save them to the vector store
vectordb = Chroma.from_documents(documents, embedding=OpenAIEmbeddings(), persist_directory="./data")
vectordb.persist()

# Create our Q&A chain
pdf_qa = ConversationalRetrievalChain.from_llm(
    ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo'),
    retriever=vectordb.as_retriever(search_kwargs={'k': 6}),
    return_source_documents=True,
    verbose=False
)

# Initialize an empty chat_history list
chat_history = []

# Print welcome message
print('------------------------------------------------------')
print('Welcome to TalentWhiz AI')
print('------------------------------------------------------')

# Main interaction loop
while True:
    query = input("User: ")  # Prompt user input
    if query.lower() in ["exit", "quit", "q", "f"]:
        print('Exiting')
        sys.exit()
    if query == '':
        print('Please enter a query.')
        continue
    result = pdf_qa({"question": query, "chat_history": chat_history})
    answer = result["answer"]

    # Clear the output to create spacing
    clear_output()

    # Print the user input
    print("User:", query)

    # Print a line to separate user input and TalentWhiz AI response
    print('------------------------------------------------------')

    # # Print the TalentWhiz AI response in yellow color
    # print("\033[0;33mTalentWhiz:", answer, "\033[0m")

    # Print the TalentWhiz AI response in green color
    print("\033[0;92mTalentWhiz:", answer, "\033[0m")

    # Append the query and answer to chat history
    chat_history.append((query, answer))


User: Saravana's total exprence?
------------------------------------------------------
[0;92mTalentWhiz: I'm sorry, but I don't have any information about Saravana's total experience. [0m
User: hi




KeyboardInterrupt: ignored