In [1]:
### LLM
from langchain_ollama import ChatOllama

local_llm = "llama3:latest"
llm = ChatOllama(model=local_llm, temperature=0)
llm_json_mode = ChatOllama(model=local_llm, temperature=0, format="json")

In [16]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_community.embeddings import HuggingFaceEmbeddings

# Load multiple URLs to have more documents
urls = [
    "https://iiitu.ac.in",
    "https://iiitu.ac.in/about",  # Add more URLs from the same site
    "https://iiitu.ac.in/academics" ,
    "https://iiitu.ac.in/campuslife",# Add more URLs from the same site
    "https://iiitu.ac.in/placements" ,
    "https://iiitu.ac.in/news" ,
]

# Split documents
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1000, chunk_overlap=200
)

all_chunks = []
for url in urls:
    loader = WebBaseLoader(url)
    docs = loader.load()
    chunks = text_splitter.split_documents(docs)
    all_chunks.extend(chunks)

# Switch to HuggingFaceEmbeddings which works locally
embedding = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2"  # This is a small, efficient embedding model
)

# Add to vectorDB
vectorstore = SKLearnVectorStore.from_documents(
    documents=all_chunks,
    embedding=embedding,
)


# Create retriever
retriever = vectorstore.as_retriever(k=3)

In [18]:
from langchain.prompts import PromptTemplate
prompt = PromptTemplate(
    template="""You are a chatbot that helps users find relevant information. \n
    
    
    Question: {question} \n
    Context: \n\n {documents} \n\n
    
    Give a relevant answer to the question. \n
    If you do not have enough information, you can ask for more information. \n
    Otherwise, you can say that you do not have enough information. \n
    """,
    input_variables=["question", "documents"],
)
basicChain=prompt | llm
question = "what is placement statistics of IIIT Una?"
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
response=basicChain.invoke({"question": question, "documents": doc_txt})

print(response)

content="I'd be happy to help!\n\nAccording to various sources, including the official website of Indian Institute of Information Technology Una (IIIT Una) and other educational portals, here are some placement statistics:\n\n* Average package: ₹8-12 lakhs per annum\n* Highest package: ₹15-20 lakhs per annum\n* Placement rate: around 80-90%\n* Top recruiters: companies like Google, Microsoft, Amazon, IBM, Accenture, etc.\n\nPlease note that these figures are subject to change and might not be up-to-date. I would recommend checking the official website of IIIT Una or contacting their placement cell for more accurate and current information.\n\nWould you like me to provide more details on this topic or explore other related aspects?" response_metadata={'model': 'llama3:latest', 'created_at': '2025-03-15T19:38:27.8915234Z', 'done': True, 'done_reason': 'stop', 'total_duration': 31540568600, 'load_duration': 57316100, 'prompt_eval_count': 92, 'prompt_eval_duration': 655000000, 'eval_count'