In [18]:
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import HuggingFaceHub
from langchain.chains import create_extraction_chain


In [10]:
def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text


def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks


def get_vectorstore(text_chunks):
    embeddings = OpenAIEmbeddings()
    # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore


def get_conversation_chain(vectorstore):
    llm = ChatOpenAI()
    # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})

    memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True)
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        memory=memory
    )
    return conversation_chain


def handle_userinput(user_question):
    response = st.session_state.conversation({'question': user_question})
    st.session_state.chat_history = response['chat_history']

    for i, message in enumerate(st.session_state.chat_history):
        if i % 2 == 0:
            st.write(user_template.replace(
                "{{MSG}}", message.content), unsafe_allow_html=True)
        else:
            st.write(bot_template.replace(
                "{{MSG}}", message.content), unsafe_allow_html=True)



In [12]:
pdf_reader=PdfReader('test.pdf')
text=''

In [13]:
for page in pdf_reader.pages:
            text += page.extract_text()

In [14]:
text

' \nSUSAN   KHATIWADA  \n\uf0b7 Shankharapur 5 , Kathmandu, N epal \n\uf0b7 +977 9849140941  \n\uf0b7 susankhatiwada23@gmail.com  \n \n \n \n\uf076 PROFESSIONAL SUMMARY   \nDedicated professional with a teaching background, currently working as a counselor and \ndocumentation officer. Known for creating inclusive learning environments and providing \nvaluable guidance. Skillful at careful record -keeping and efficient administrative tasks. Ready to \nbring a continuous balance of educational and organizational skil ls to contribute to team.  \nOrganized simultaneous office functions and direct administrative personnel to meet needs of \nprofessionals. Performance -oriented and driven with in -depth understanding of budgets, payroll \nand office organization needs. Skillfully coordinate resources and administrative support to keep \noperations smooth and boost team productivity.  Excellent team  player with positive attitude. \nPossess good time ma nagement & organization skills. Career

In [15]:
schema={"properties":{"experience":{"type":"string"},"total-work-experience":{"type":"integer"},'tools/skills':{"type":"string"}}}

In [16]:
openai_api_key = ''

llm = ChatOpenAI(
    # model_name="gpt-3.5-turbo",
    model="gpt-3.5-turbo",
    temperature=0,
    max_tokens=2000,
    openai_api_key=openai_api_key
)

In [19]:
chain=create_extraction_chain(schema,llm)

In [20]:
chain.run(text)

[{'experience': 'Counselor and Documentation Officer',
  'total-work-experience': 1,
  'tools/skills': 'Care Planning, Office Administration, Counseling, Communication Skills, MS Word, MS Excel, PowerPoint, Bookkeeping, Database administration, Documentation and control'},
 {'experience': 'Lower Secondary School Teacher',
  'total-work-experience': 4,
  'tools/skills': 'Lesson Planning, Small Group Instruction, Assessments, Positive Reinforcement Strategies, Behavior Management Techniques'}]