https://python.langchain.com/docs/get_started/quickstart

In [27]:
import os
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage
from langchain.retrievers.tavily_search_api import TavilySearchAPIRetriever
from langchain_core.runnables import RunnablePassthrough

In [3]:
# Fetch the OpenAI API key from environment variables
openai_api_key = os.getenv("OPENAI_API_KEY")

# Fetch the Tavily API key from environment variables
tavily_api_key = os.getenv("TAVILY_API_KEY")

In [4]:
# Check if the OpenAI API key is set
if openai_api_key:
    print("OpenAI API key is set")
else:
    print("OpenAI API key is not set")

# Check if the Tavily API key is set
if tavily_api_key:
    print("Tavily API key is set")
else:
    print("Tavily API key is not set")

OpenAI API key is set
Tavily API key is set


In [5]:
# Initialize the LLM
llm = ChatOpenAI()

In [6]:
# Define a query
query = "What are the key topics covered in each of the three sections of the certificate program?"

In [7]:
# Invoke the LLM
llm.invoke(query)

AIMessage(content="The key topics covered in each of the three sections of the certificate program may vary depending on the specific program. However, generally, the three sections of a certificate program cover:\n\n1. Foundation Section:\n- Introduction to the field or subject area of the certificate program\n- Basic principles, concepts, and theories related to the subject\n- Overview of key terminology and frameworks\n- Understanding the historical context and development of the field\n- Introduction to relevant research methods and approaches\n- Ethical considerations and professional standards in the field\n\n2. Core Section:\n- In-depth study of key concepts, principles, and theories in the subject area\n- Analysis of case studies and real-world examples to illustrate the application of knowledge\n- Exploration of key issues, challenges, and trends in the field\n- Advanced research methods and data analysis techniques\n- Development of critical thinking and problem-solving skill

In [8]:
# Define a prompt template
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an instructor for the certificate program at a university in the US."),
    ("user", "{input}")
])

In [9]:
# Define a simple chain
chain = prompt | llm

In [10]:
# Ask the question again
chain.invoke({"input": query})

AIMessage(content='In the certificate program, we cover a range of key topics in each of the three sections. Here is an overview of the key topics covered in each section:\n\nSection 1: Foundation Skills\n1. Communication Skills: Verbal and written communication, active listening, effective presentation skills.\n2. Critical Thinking: Problem-solving, decision-making, analytical thinking, and logical reasoning.\n3. Time Management: Planning, prioritizing tasks, setting goals, and managing deadlines.\n4. Teamwork and Collaboration: Building effective teams, resolving conflicts, promoting diversity and inclusion.\n5. Adaptability and Flexibility: Embracing change, managing ambiguity, and being open to learning.\n\nSection 2: Technical Skills\n1. Technology Literacy: Basic computer skills, proficiency in Microsoft Office Suite, and internet navigation.\n2. Data Analysis: Introduction to data analysis tools, data visualization, and interpreting data.\n3. Digital Marketing: Fundamentals of o

In [11]:
# Improve the chain with an output parser
output_parser = StrOutputParser()
chain = prompt | llm | output_parser

In [12]:
# Invoke the chain again
chain.invoke({"input": query})

'In our certificate program, we have divided the curriculum into three sections to provide a comprehensive education. The key topics covered in each section are as follows:\n\nSection 1: Foundation Courses\n1. Introduction to the Field: This course provides an overview of the industry and its various sectors, giving students a solid foundation of knowledge.\n2. Research Methods: Students learn about different research methodologies and techniques used in the field, including data collection, analysis, and interpretation.\n3. Strategic Planning: This course focuses on developing strategic plans for organizations, including goal setting, SWOT analysis, and resource allocation.\n4. Leadership and Management: Students learn about effective leadership styles, team management, and organizational behavior.\n5. Communication Skills: This course helps students develop strong written and verbal communication skills, including presentations and professional correspondence.\n\nSection 2: Specializ

In [13]:
# Define a loader
loader = WebBaseLoader("https://brownschool.wustl.edu/resources-initiatives/advanced-learning-certificate/artificial-intelligence-applications-for-health-data-2/")

# Load the data
docs = loader.load()

In [14]:
# Take a look at the webscraped data
print(docs)

[Document(page_content='\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\u200bArtificial Intelligence Applications for Health Data - Brown School at Washington University in St. Louis\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\nAdvancing Social Work, Public Health & Social Policy\n\n\n\n\nMenu\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\nAcademics\n\nMaster of Social Work\nMaster of Public Health\nMaster of Social Policy\nDual Degrees\n3-2 Programs\nPhD in Social Work\nPhD in Public Health Sciences\nAIBDA Certificate\nGl

In [15]:
# Innitiate the openai embeddings
embeddings = OpenAIEmbeddings()

In [16]:
# Initiate the text splitter
text_splitter = RecursiveCharacterTextSplitter()

# Split the docs into smaller chunks
documents = text_splitter.split_documents(docs)

# Create the vector store using the splitted docs and embeddings
vector = FAISS.from_documents(documents, embeddings)

In [17]:
# Create a new prompt
prompt = ChatPromptTemplate.from_template("""
    Answer the following question based only on the provided context: {context}
    Question: {input}
    """)

# Create the document chain
document_chain = create_stuff_documents_chain(llm, prompt)

In [18]:
# Innitiate the retriever with the vector store
retriever = vector.as_retriever()

# Create the retrieval chain
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [19]:
# Invoke the retrieval chain
response = retrieval_chain.invoke({"input": query})
print(response)

{'input': 'What are the key topics covered in each of the three sections of the certificate program?', 'context': [Document(page_content='Instructor:\nRuopeng An, PhD, MPP, FACE\nAssociate Professor and Faculty Lead in Public Health Sciences, Brown School\nQuestions?\nPlease contact us at 314.935.7573 or browncertificates@wustl.edu\n\n\nIs this program for me?\nThis program is intended for individuals who have a demonstrated interest in increasing their skills in applying the power of AI techniques to health data for research or industry purposes. You do not have to be an alum of the Brown School to apply for consideration.\nThis is an online program, with scheduled class meetings on Thursday mornings conducted by Zoom and self-paced content, such as readings and assignments.\xa0 Applicants should be prepared to commit 5 hours of total effort per week. Reading assignments come from provided texts, which are included in the program fee.\nThe program presupposes a basic knowledge of stat

In [21]:
# Get the answer only
print(response['answer'])

The key topics covered in each of the three sections of the certificate program are as follows:

Weeks 1-2: Overview of artificial intelligence, AI and machine learning, coding in Python, data wrangling using NumPy and Pandas, and data visualization using Matplotlib.

Weeks 3-7: Machine learning applications, including classification and regression, model training and validation, support vector machines, decision trees, ensemble methods, dimensionality reduction, unsupervised learning, and auto ML.

Weeks 8-15: Deep learning applications, including neural networks, computer vision, natural language processing, recommender systems, time series forecasting, and creating synthetic data for research and analysis.


The chain we've created so far can only answer single questions. One of the main types of LLM applications that people are building are chat bots. So how do we turn this chain into one that can answer follow up questions?

In [22]:
# Make a prompt that includes the chat history
prompt = ChatPromptTemplate.from_messages([
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}"),
    ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation")
])

# Create the history aware retriever
retriever_chain = create_history_aware_retriever(llm, retriever, prompt)

In [23]:
# create a new chain to continue the conversation with these retrieved documents in mind
prompt = ChatPromptTemplate.from_messages([
    ("system", "Answer the user's questions based on the below context:\n\n{context}"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}"),
])
document_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever_chain, document_chain)

In [24]:
chat_history = [HumanMessage(content=query), AIMessage(content=response['answer'])]
retrieval_chain.invoke({
    "chat_history": chat_history,
    "input": "Tell me more about the topics covered in the final weeks"
})

{'chat_history': [HumanMessage(content='What are the key topics covered in each of the three sections of the certificate program?'),
  AIMessage(content='The key topics covered in each of the three sections of the certificate program are as follows:\n\nWeeks 1-2: Overview of artificial intelligence, AI and machine learning, coding in Python, data wrangling using NumPy and Pandas, and data visualization using Matplotlib.\n\nWeeks 3-7: Machine learning applications, including classification and regression, model training and validation, support vector machines, decision trees, ensemble methods, dimensionality reduction, unsupervised learning, and auto ML.\n\nWeeks 8-15: Deep learning applications, including neural networks, computer vision, natural language processing, recommender systems, time series forecasting, and creating synthetic data for research and analysis.')],
 'input': 'Tell me more about the topics covered in the final weeks',
 'context': [Document(page_content='Instructor:

In [26]:
retriever = TavilySearchAPIRetriever(k=3)
retriever.invoke("how is the weather today in St. Louis?")

[Document(page_content="Weather in St. Louis is {'location': {'name': 'St-Louis', 'region': 'Saint-Louis', 'country': 'Senegal', 'lat': 16.02, 'lon': -16.5, 'tz_id': 'Africa/Dakar', 'localtime_epoch': 1706666105, 'localtime': '2024-01-31 1:55'}, 'current': {'last_updated_epoch': 1706665500, 'last_updated': '2024-01-31 01:45', 'temp_c': 22.0, 'temp_f': 71.6, 'is_day': 0, 'condition': {'text': 'Clear ', 'icon': '//cdn.weatherapi.com/weather/64x64/night/113.png', 'code': 1000}, 'wind_mph': 12.1, 'wind_kph': 19.4, 'wind_degree': 40, 'wind_dir': 'NE', 'pressure_mb': 1015.0, 'pressure_in': 29.96, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 30, 'cloud': 0, 'feelslike_c': 23.3, 'feelslike_f': 73.9, 'vis_km': 10.0, 'vis_miles': 6.0, 'uv': 1.0, 'gust_mph': 25.4, 'gust_kph': 40.8}}", metadata={'title': 'Weather in St. Louis', 'source': 'https://www.weatherapi.com/', 'score': 0.98191, 'images': None}),
 Document(page_content='Currently: 39 °F. Fog. (Weather station: Lambert-St. Louis Internati