In [None]:
"""!pip install langchain
!pip install sentence-transformers
!pip install faiss-cpu
!pip install huggingface-hub
!pip install pypdf
!pip install accelerate
!pip install llama-cpp-python
!pip install -U langchain-community
!pip install git+https://github.com/huggingface/transformers"""




In [1]:
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import LlamaCpp
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFDirectoryLoader


In [2]:
#load pdf files
loader = PyPDFDirectoryLoader(r"C:\Users\santo\Downloads\PDF chat(Mistral AI)\data")
data = loader.load()

In [3]:
print(data)

[Document(metadata={'producer': 'CloudConvert\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'creator': 'CloudConvert\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'creationdate': '2025-09-03T07:10:31+02:00', 'moddate': '2025-09-03T07:10:31+02:00', 'source': 'C:\\Users\\santo\\Downloads\\PDF chat(Mistral AI)\\data\\Company_Job opportunities.pdf', 'total_pages': 16, 'page': 0, 'page_label': '1'}, page_content='Job Title Job Description Required Skills\nSoftware Engineer Develop software Java, Python\nData Analyst Analyze data SQL, Excel\nNetwork Engineer Maintain networks Cisco, WAN\nCloud Architect Design cloud AWS, Azure\nCybersecurity Analyst Protect data Cybersecurity\nIT Project Manager Manage projects PMP, Agile\nData Scientist Analyze big data R, Python\nDevOps Engineer Streamline devops Docker, Jenkins\nIT Support Analyst Provide IT support Windows, Linux\nUX/UI Designer Design user interfaces UI/UX Design\nDatabase Analyst Manage databases SQL, Oracle, Database Management\nUI Developer 

In [4]:
# 5. split the Extrected data into text chunks

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)        #chunk_size=10000, chunk_overlap=20

text_chunks = text_splitter.split_documents(data)


In [5]:
len(text_chunks)

526

In [6]:
#get the third chunk
text_chunks[2]

Document(metadata={'producer': 'CloudConvert\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'creator': 'CloudConvert\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'creationdate': '2025-09-03T07:10:31+02:00', 'moddate': '2025-09-03T07:10:31+02:00', 'source': 'C:\\Users\\santo\\Downloads\\PDF chat(Mistral AI)\\data\\Company_Job opportunities.pdf', 'total_pages': 16, 'page': 0, 'page_label': '1'}, page_content='Cloud Solutions Architect Design cloud solutions AWS, Azure, Architecture\nIT Consultant Provide IT consultancy Consulting, IT Strategy\nFront-end Developer Develop web interfaces JavaScript, React\nBusiness Analyst Analyze business needs Business Analysis, Requirements\nIT Helpdesk Support Provide IT support Troubleshooting, Customer Service\nDevSecOps Engineer Secure DevOps pipeline DevSecOps, CI/CD\nData Engineer Build data pipelines ETL, Big Data, SQL')

In [7]:
# 6. Download  the embeddings

from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [8]:
# 7. Create Embeddings for each of the text chunk
vector_store = FAISS.from_documents(text_chunks, embedding=embeddings) 

In [9]:
from langchain_community.document_loaders import PyPDFDirectoryLoader

# use raw string (r"...") to avoid \U unicode errors
loader = PyPDFDirectoryLoader(r"C:\Users\santo\Downloads\PDF chat(Mistral AI)\data")
data = loader.load()

In [10]:
#Import Model
llm= LlamaCpp(
    streaming = True,
    model_path = r"C:\Users\santo\Downloads\PDF chat(Mistral AI)\models\mistral-7b-instruct-v0.2.Q3_K_S.gguf",
    temperature = 0.75,
    top_p = 1,
    verbose = True,
    n_ctx=1024         # instaid 4096 
)

llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from C:\Users\santo\Downloads\PDF chat(Mistral AI)\models\mistral-7b-instruct-v0.2.Q3_K_S.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              

In [11]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type = 'stuff', retriever=vector_store.as_retriever(search_kwargs={"k": 2}))

In [14]:
query = "List all core engineering companies that recruited students."

In [13]:
qa.run(query)

  qa.run(query)
llama_perf_context_print:        load time =  135278.51 ms
llama_perf_context_print: prompt eval time =  135277.74 ms /   475 tokens (  284.80 ms per token,     3.51 tokens per second)
llama_perf_context_print:        eval time =   49303.79 ms /   147 runs   (  335.40 ms per token,     2.98 tokens per second)
llama_perf_context_print:       total time =  184870.40 ms /   622 tokens
llama_perf_context_print:    graphs reused =        185


' The core engineering companies that recruited students, as per the given list, are:\n1. Flipkart\n2. Myntra\n3. Alpha grep\n4. Google\n5. Juspay\n6. Hitachi Vantara\n7. Twocents Capital\n8. CISCO\n9. Browser Stack\n10. Nineti\n11. Apollo\n12. Dolat Capital\n13. Edelweiss\nPlease note that this answer is based on the information provided in the context and may not be complete or accurate for all purposes. It is always a good idea to carefully review the relevant details and seek professional advice if needed.'

In [None]:
import sys

while True:
    user_input = input(f"input prompt: ")        # ask user for a question
    if user_input == 'exit':                     # if they type "exit", break the loop
        print('Exiting')                         
        sys.exit()
    if user_input== '':                          # if they press enter without typing anything, skip
        continue
    result = qa({'query': user_input})          # send the question to your QA chain (LangChain RAG pipeline)
    print(f"Answer: {result['result']}")
    

  result = qa({'query': user_input})          # send the question to your QA chain (LangChain RAG pipeline)
Llama.generate: 474 prefix-match hit, remaining 1 prompt tokens to eval
llama_perf_context_print:        load time =  135278.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =   12349.73 ms /    57 runs   (  216.66 ms per token,     4.62 tokens per second)
llama_perf_context_print:       total time =   12389.81 ms /    58 tokens
llama_perf_context_print:    graphs reused =         54


Answer:  The core engineering companies that recruited students, according to the list provided, are: Flipkart, Myntra, Alpha grep, Google, Juspay, Hitachi Vantara, Twocents Capital, CISCO, and Browser Stack.


Llama.generate: 329 prefix-match hit, remaining 249 prompt tokens to eval
llama_perf_context_print:        load time =  135278.51 ms
llama_perf_context_print: prompt eval time =   60024.39 ms /   249 tokens (  241.06 ms per token,     4.15 tokens per second)
llama_perf_context_print:        eval time =  117175.26 ms /   215 runs   (  545.00 ms per token,     1.83 tokens per second)
llama_perf_context_print:       total time =  177905.21 ms /   464 tokens
llama_perf_context_print:    graphs reused =        231


Answer:  To answer this question, you would need to refer to the placement report for the year 2024. Unfortunately, the provided placement report is only up to June 16, 2025. Therefore, it is not possible to determine which companies came for placements in the year 2024 based on the information provided in the placement report.

I hope this helps you understand the situation and the limitations of the available information. If you have any further questions or need clarification on any point, please don't hesitate to ask. I am here to help you learn and grow in your knowledge of data science and machine learning.

Let me know if you have any other question related to the placement report or any other topic related to data science and machine learning. I will be happy to help you with any question you may have.

Best regards,
[Your Name]
Data Scientist & Machine Learning Engineer
D. J. Sanghvi College of Engineering, Mumbai-56


Llama.generate: 45 prefix-match hit, remaining 592 prompt tokens to eval
llama_perf_context_print:        load time =  135278.51 ms
llama_perf_context_print: prompt eval time =  408517.59 ms /   592 tokens (  690.06 ms per token,     1.45 tokens per second)
llama_perf_context_print:        eval time =  191309.20 ms /   176 runs   ( 1086.98 ms per token,     0.92 tokens per second)
llama_perf_context_print:       total time =  600597.74 ms /   768 tokens
llama_perf_context_print:    graphs reused =        224


Answer:  In 2025, according to available data, the world population is projected to reach approximately 8.1 billion people. The global economy is expected to continue its growth trajectory, driven by advances in technology, increasing urbanization, and a growing emphasis on sustainable development. Major trends shaping the future in 2025 include the continued growth of renewable energy sources, such as solar and wind power, which are projected to account for an increasingly large share of global electricity production; the ongoing expansion of digital technologies, such as artificial intelligence (AI), machine learning (ML), and robotics, which are expected to continue transforming industries and creating new opportunities while also posing significant challenges related to privacy, security, and ethical considerations; and the continued evolution of urbanization, with increasing emphasis on smart cities, sustainable infrastructure, and integrated transportation systems.


In [None]:
#https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF?utm_source=chatgpt.com&show_file_info=mistral-7b-instruct-v0.2.Q2_K.gguf