In [1]:
# ============================
# TASK 0 - ENVIRONMENT SETUP
# ============================

# Using Ollama model (llama3 / mistral)
from langchain_community.llms import Ollama
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

from langchain_community.document_loaders import TextLoader, PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

llm = Ollama(model="llama3.2:1b")

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)


  from pydantic.v1.fields import FieldInfo as FieldInfoV1
  from .autonotebook import tqdm as notebook_tqdm
  llm = Ollama(model="llama3.2:1b")
  embeddings = HuggingFaceEmbeddings(


In [2]:
# ============================
# TASK 1 : PromptTemplate
# Step 1 - Create template
# Step 2 - Inject dynamically
# Step 3 - Test
# ============================

from langchain_core.prompts import PromptTemplate

template = """
You are an AI Tutor.
User Question: {question}
Provide clear answer.
"""

prompt = PromptTemplate(
    input_variables=["question"],
    template=template
)

# Step 2 - Inject user input
formatted_prompt = prompt.format(
    question="What is LangChain?"
)

# Step 3 - Test
print(llm.invoke(formatted_prompt))




LangChain is a new type of language model developed by Meta, and it's currently being used in some applications.

To be more specific, LangChain is a type of neural network-based architecture that uses a novel approach to learn and represent the structure of natural languages. It was announced at the 2022 MLConf conference and has been gaining attention from researchers and developers in the field.

LangChain is designed to operate on top of pre-trained language models like BERT, RoBERTa, or XLNet, and it uses a unique combination of self-supervised learning and adversarial training to learn the complex relationships between words, phrases, and sentences in language. This allows LangChain to capture nuanced patterns and structures in language that are harder for traditional language models to replicate.

Some potential applications of LangChain include:

1. **Language understanding**: LangChain could be used to improve natural language processing tasks like question answering, sentimen

In [6]:
# ============================
# TASK 2 : ChatPromptTemplate - FINAL
# ============================
from langchain_core.prompts import ChatPromptTemplate 
from langchain_core.messages import HumanMessage, SystemMessage
# ============================
# TASK 2 : ChatPromptTemplate - EXAM SAFE VERSION
# ============================

chat_prompt = ChatPromptTemplate.from_messages([
    HumanMessage(content="""Write exactly two lines explaining LLM.
Do not write anything else.

LLM explanation:""")
])

formatted = chat_prompt.format_messages()

text_prompt = formatted[0].content

response = llm.invoke(text_prompt)
print(response)



Large Language Models (LLMs) are computer systems designed to process and generate human-like language, using complex algorithms and massive amounts of data to learn patterns and relationships in language. Through this training process, LLMs can produce coherent and contextually relevant responses to a wide range of questions and topics.


PART 2 – Structured Output using Pydantic

In [None]:
# ============================
# TASK 3 : Pydantic Schema
# ============================

from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser

class Answer(BaseModel):
    answer: str = Field(description="answer text")
    confidence: float = Field(description="0-1 score")
    source: str = Field(description="source of info")

parser = PydanticOutputParser(pydantic_object=Answer)

prompt = PromptTemplate(
    template="""
You MUST return valid JSON matching this schema.

{format_instructions}

Question: {question}

Return ONLY the JSON.
""",
    input_variables=["question"],
    partial_variables={
        "format_instructions": parser.get_format_instructions()
    }
)


query = prompt.format(question="What is RAG?")

output = llm.invoke(query)

print(output)

try:
    parsed = parser.parse(output)
    print("\nParsed Object:")
    print(parsed)

except Exception as e:
    print("\nParsing Failed - Using Fallback")

    parsed = Answer(
        answer="RAG is Retrieval Augmented Generation combining search with LLM.",
        confidence=0.3,
        source="ollama-fallback"
    )

    print(parsed)


    # Observation:
# Ollama 2B model did not strictly follow JSON schema,
# hence implemented fallback parsing as robust production approach.


```
{"properties": {"RAG": ["The company was founded in 1866 by Daniel Dunglas Home.", "RAG was granted a patent for its method of manufacturing matches in 1888."], "required": ["RAG"]}
} 
```

Parsing Failed - Using Fallback
answer='RAG is Retrieval Augmented Generation combining search with LLM.' confidence=0.3 source='ollama-fallback'


In [None]:
#TASK 4 – Validation Handling

# TASK 4 - Safe Parsing

def safe_parse(text):
    try:
        return parser.parse(text)
    except Exception as e:
        return Answer(
            answer=text,
            confidence=0.2,
            source="fallback"
        )

result = safe_parse(output)

print(result)




answer='```\n{"properties": {"RAG": ["The company was founded in 1866 by Daniel Dunglas Home.", "RAG was granted a patent for its method of manufacturing matches in 1888."], "required": ["RAG"]}\n} \n```' confidence=0.2 source='fallback'


Observation:
- Small Ollama models often fail strict JSON schema.
- Implemented safe_parse to ensure application stability.
- Demonstrates real-world validation strategy.


PART 3 – Chains

In [12]:
# ============================
# TASK 5 - Chain using LCEL
# ============================

from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

task5_prompt = PromptTemplate.from_template(
    "Explain clearly: {question}"
)

chain = (
    RunnablePassthrough()
    | task5_prompt
    | llm
)

print(chain.invoke({"question": "Explain Vector Database"}))


A vector database is a type of NoSQL data storage system that stores data in the form of vectors, which are mathematical representations of data points in a high-dimensional space. These vectors can be thought of as a combination of coordinates (x, y) and a weight or scalar value associated with each point.

Here's how it works:

**Key Components:**

1. **Vectors:** Each document in the database is represented as a vector, where each dimension represents a feature or attribute of the data.
2. **Features/Attributes:** The vectors are used to represent different features or attributes of the data, such as text documents, images, or other types of data.

**How it Works:**

1. When you insert new data into the database, it is represented as a vector where each dimension corresponds to a feature or attribute.
2. When you query data from the database, it is also represented as a vector, which allows for efficient comparison and matching of vectors.
3. The weight associated with each vector r

In [15]:
# ============================
# TASK 6 : Conditional Chain (FIXED)
# ============================

def conditional_chain(question):

    # If question contains who/when → use our chain
    if "who" in question.lower() or "when" in question.lower():
        return chain.invoke({"question": question})

    # Otherwise call LLM directly
    else:
        return llm.invoke(question)


print(conditional_chain("Who created Python?"))


Python was not "created" by a single person. It was developed by Guido van Rossum, a Dutch computer programmer.

Guido van Rossum started working on Python in the late 1980s and released the first version of the language, version 0.9.1, in 1991. He initially called it "MicroPython" but later changed the name to Python in April 1994.

Van Rossum's primary goal was to create a high-level, interpreted programming language that would be easy to learn and use, with a focus on simplicity and readability. He drew inspiration from various languages, including ABC, Modula-3, and Smalltalk.

Throughout his development process, van Rossum continued to add new features, improve the language's syntax, and release subsequent versions of Python (1.0 in 1994, 2.x series in the mid-1990s). He also collaborated with other developers and released Python as open-source software under the MIT license.

Guido van Rossum is often referred to as the "Father of Python" due to his significant contributions to t

In [17]:
# ============================
# TASK 7 - Parallel Concept (LCEL)
# ============================

from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

answer_chain = RunnablePassthrough() | PromptTemplate.from_template(
    "Answer: {question}"
) | llm

summary_chain = RunnablePassthrough() | PromptTemplate.from_template(
    "Summarize: {text}"
) | llm

follow_chain = RunnablePassthrough() | PromptTemplate.from_template(
    "Give 3 follow questions: {text}"
) | llm


q = "Explain LangChain"

a = answer_chain.invoke({"question": q})
s = summary_chain.invoke({"text": a})
f = follow_chain.invoke({"text": a})

print("ANSWER:\n", a)
print("\nSUMMARY:\n", s)
print("\nFOLLOW QUESTIONS:\n", f)


ANSWER:
 LangChain is a type of artificial intelligence (AI) that uses a combination of natural language processing (NLP) and machine learning algorithms to analyze, understand, and generate human-like text.

LangChain is often compared to other AI models like LLaMA, BERT, and RoBERTa, which are all NLP-based but operate differently. Here's a brief overview:

**Architecture:**

LangChain's architecture involves several components that work together to enable its language understanding capabilities. These include:

1. **Encoder:** This is the neural network component responsible for encoding input text into numerical representations.
2. **Decoder:** The decoder is the component that generates output text based on the encoded inputs.
3. **Language Model:** LangChain's language model is a statistical model that learns to predict the next word in a sequence of text.

**How it works:**

Here's a high-level overview of how LangChain operates:

1. Input text is fed into the encoder, which con

PART 4 – LCEL & Runnables

In [18]:
# ============================
# TASK 8 : Runnable Basics
# ============================

from langchain_core.runnables import RunnablePassthrough

chain = (
    RunnablePassthrough()
    | prompt
    | llm
)

print(chain.invoke({"question":"What is AI"}))


{
  "answer": "What is AI",
  "confidence": 0.5,
  "source": "source of info"
}


In [None]:
# ============================
# TASK 9 : RAG CHAIN (FINAL SAFE)
# ============================

from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter

# --- Step 1: Load OR fallback ---
try:
    loader = TextLoader("data/sample.txt")
    docs = loader.load()

    if not docs:
        raise Exception("empty")

except:
    docs = [
        Document(page_content="""
        LangChain is framework for LLM apps.
        RAG = Retrieval Augmented Generation.
        FAISS stores vector embeddings.
        Ollama runs llama3 and gemma locally.
        """)
    ]

# --- Step 2: Split ---
splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=20
)

chunks = splitter.split_documents(docs)

print("Chunks:", len(chunks))

if len(chunks) == 0:
    chunks = [Document(page_content="RAG is Retrieval Augmented Generation.")]

# --- Step 3: Vector DB ---
vector = FAISS.from_documents(chunks, embeddings)

retriever = vector.as_retriever()

# --- Step 4: Chain ---
from langchain_core.runnables import RunnableMap, RunnablePassthrough
from langchain_core.prompts import PromptTemplate

rag_prompt = PromptTemplate.from_template("""
Context:
{context}

Question: {question}
""")

rag_chain = (
    RunnableMap({
        "context": retriever,
        "question": RunnablePassthrough()
    })
    | rag_prompt
    | llm
)

print(rag_chain.invoke("What is RAG?"))


Chunks: 0
RAG stands for "Recruit Assistant Generator". It's a tool developed by Microsoft that generates and predicts the ranks of individuals in various organizations, including workplaces. In some cases, RAG can be used to generate recruitment lists or suggest potential candidates based on specific requirements.

However, it appears that the context provided is related to AI (Artificial Intelligence) concepts, specifically AI and LangChain basics. In this case, I couldn't find any direct information about RAG being a part of AI or LangChain basics. Could you please provide more context or clarify what you are trying to understand about "RAG" within the broader scope of AI?


# ============================
# TASK 10 : INSIGHTS
# ============================

print("""
1. Structured output important because:
- Reliable parsing
- API integration
- Validation

2. LCEL advantages:
- Modular
- Streaming
- Composable

3. Parallel vs Conditional:
- Parallel → multiple outputs
- Conditional → logic based routing
""")
