In [2]:
print("Hello")

Hello


In [3]:
print(5+4)

9


In [4]:
%pip install --quiet --upgrade langchain langchain-community langchain-chroma

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
%pip install -qU langchain-openai

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


# Indexing

In [1]:
import getpass
import os

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o")

In [11]:
from langchain.schema import (
    SystemMessage,
    HumanMessage,
    AIMessage
)

messages = [
    SystemMessage(content="You are a helpful assistant."),
    HumanMessage(content="Hi AI, how are you today?"),
    AIMessage(content="I'm great thank you. How can I help you?"),
    HumanMessage(content="What can you tell me about W3B JMI?")
]

In [13]:
res = llm(messages)
res.content

"W3B JMI is a blockchain-based platform that aims to integrate Web3 technologies with various business processes. It focuses on providing decentralized solutions for different industries, leveraging blockchain's transparency, security, and efficiency. The platform might offer services such as smart contract development, decentralized finance (DeFi) solutions, and NFT marketplaces, although specific offerings can vary.\n\nIf you have specific aspects of W3B JMI you're curious about, such as its use cases, technology stack, or team, feel free to provide more details!"

## Loading

In [14]:
import os
from langchain.document_loaders import TextLoader

data_dir = "data"

file_paths = [os.path.join(data_dir, file) for file in os.listdir(data_dir) if file.endswith(".txt")]
documents = []
for file_path in file_paths:
    loader = TextLoader(file_path)
    documents.extend(loader.load())

In [18]:
len(documents)

3

## Splitting & Storing

In [2]:
from langchain import hub
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [19]:

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(documents)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

In [20]:
len(splits)

11

In [22]:
splits[0].page_content

'W3B - AI & Blockchain Society of JMI\n\nMission Statement: W3B - AI & Blockchain Society of Jamia Millia Islamia (JMI) is dedicated to fostering a community of innovators and learners passionate about Artificial Intelligence and Blockchain technologies. Our mission is to educate, inspire, and empower students to explore these cutting-edge fields through collaborative learning and real-world applications.\n\nAbout Us\nFounded in 2020, W3B is a student-led society that serves as a hub for technology enthusiasts at JMI. We focus on:\n\nEducation: Hosting workshops and seminars to teach the fundamentals and advanced concepts in AI and Blockchain.\nInnovation: Encouraging project development and research among members.\nNetworking: Connecting students with industry professionals and alumni.\nPast Events\nAI in Healthcare Seminar\nDate: March 15, 2021\nDescription: A seminar discussing the impact of AI on the healthcare industry, featuring guest speakers from leading hospitals and tech comp

In [3]:
vectorstore = Chroma(persist_directory="db", embedding_function=OpenAIEmbeddings())

In [4]:
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")



# Retrieval & Generation

In [5]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [6]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [12]:
rag_chain.invoke("What is W3B JMI?")

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


'W3B JMI is the AI & Blockchain Society of Jamia Millia Islamia, founded in 2020. It is a student-led society that aims to foster a community passionate about AI and Blockchain technologies through education, innovation, and networking. The society organizes workshops, seminars, and projects to educate and empower students in these fields.'

In [10]:
rag_chain.invoke("What is the finances of the organization?")

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


'The organization, W3B - AI & Blockchain Society of JMI, has a total income of ₹500,000 for the fiscal year 2022-2023, with expenses amounting to ₹360,000, resulting in a surplus of ₹140,000. Income sources include membership fees, sponsorships, and university grants, while expenses cover event costs, equipment, marketing, and administrative costs. The organization maintains a healthy financial status with consistent growth and efficient expense management.'

In [46]:
rag_chain.invoke("How is the marketing like of W3B?")

'The marketing of W3B involves hosting public seminars and workshops, collaborating with tech companies and startups for event sponsorships, and maintaining a strong presence on social media. They focus on education, innovation, and networking to connect students with industry professionals and offer hands-on experiences. Additionally, they provide updates via social media and newsletters to keep the community engaged.'