<a href="https://colab.research.google.com/github/AdityaMali918/Python/blob/main/GEN_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 1. RunnableConnector self developed  

In [None]:
from abc import abstractmethod,ABC

In [None]:
class Runnable(ABC):
  @abstractmethod
  def invoke(input_data):
    pass

In [None]:
import random

class NakliLLM(Runnable):
  def __init__(self):
    print("LLM created")

  def invoke(self,prompt):
    response_list = [
        'Delhi is the capital of India',
        'IPL is a cricket league',
        'AI stands for Artificial Intelligence'
    ]

    return {"response": random.choice(response_list)}

In [None]:
class NakliPromptTemplate(Runnable):
  def __init__(self,template,input_variables):
    self.template = template
    self.input_variables = input_variables

  def invoke(self,input_dict):
    return self.template.format(**input_dict)

  def format(self,input_dict):
    return self.template.format(**input_dict)

In [None]:
class NakliStrOutputParser(Runnable):

  def __init__(self):
    pass

  def invoke(self,input_data):
    return  input_data['response']

In [None]:
class RunnableConnector(Runnable):
  def __init__(self,runnable_list):
    self.runnable_list = runnable_list

  def invoke(self,input_data):
    for runnable in self.runnable_list:
      input_data = runnable.invoke(input_data)
    return input_data


In [None]:
template = NakliPromptTemplate(
    template='Write a {length} poem about {topic}',
    input_variables=['length', 'topic']
)

In [None]:
llm = NakliLLM()

LLM created


In [None]:
parser = NakliStrOutputParser()

In [None]:
chain = RunnableConnector([template,llm,parser])

In [None]:
chain.invoke({'length':'long', 'topic':'india'})

'IPL is a cricket league'

# ***Next Example***

In [None]:
template1 = NakliPromptTemplate(
    template='Write a joke about {topic}',
    input_variables=['topic']
)

In [None]:
template2 = NakliPromptTemplate(
    template='Explain the following joke {response}',
    input_variables=['response']
)

In [None]:
chain1 = RunnableConnector([template1, llm])

In [None]:
chain2 = RunnableConnector([template2, llm, parser])

In [None]:
final_chain = RunnableConnector([chain1, chain2])

In [None]:
final_chain.invoke({'topic':'cricket'})

'IPL is a cricket league'

# **Runnable**

# **1. RunnableSequennce**

In [None]:
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
from langchain_core.prompts import PromptTemplate
from langchain.schema.runnable import RunnableSequence
from langchain_core.output_parsers import StrOutputParser
import os

os.environ['HF_HOME'] = 'D:/huggingface_cache'

llm = HuggingFacePipeline.from_model_id(
    model_id='TinyLlama/TinyLlama-1.1B-Chat-v1.0',
    task='text-generation',
    device=0,
    pipeline_kwargs=dict(
        temperature=0.5,
        max_new_tokens=100,
    )
)

model = ChatHuggingFace(llm=llm)

prompt1 = PromptTemplate(
      template = 'Write a joke about {topic}',
      input_variables=["topic"]
)

parser = StrOutputParser()

chain = RunnableSequence(prompt1,model,parser)

result = chain.invoke({"topic":"traffic"})
print(result)


Device set to use cuda:0


<|user|>
Write a joke about traffic</s>
<|assistant|>
Traffic Cop: "Hey, watch it there! You're going too fast!"

Driver 1: "Uh, sorry about that. I was on my way to work."

Driver 2: "Oh, you're a traffic cop? That's awesome!"

Driver 1: "Yeah, it's my job to keep the road safe."

Driver 2: "That's good to know. I thought


# **2. RunnableParallel**

In [None]:
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnableSequence, RunnableParallel
import os

os.environ["HF_HOME"] = 'D:/huggingface_cache'

llm = HuggingFacePipeline.from_model_id(
    model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task = "text-generation",
    device = 0,
    pipeline_kwargs=dict(
        temperature = 0.7,
        max_new_tokens = 150
    )
)

model = ChatHuggingFace(llm = llm)

parser = StrOutputParser()

prompt1 = PromptTemplate(
    template="Generate Linkedin Post on this {topic}",
    input_variables=["topic"]
)

prompt2 = PromptTemplate(
    template="Generate Twitter tweet on this {topic}",
    input_variables=["topic"]
)

parallel_chain = RunnableParallel({
    "linkedin":RunnableSequence(prompt1,model,parser),
    "tweet":RunnableSequence(prompt2,model,parser)
})
result = parallel_chain.invoke({"topic":"AI"})
print(result["linkedin"])
print(result["tweet"])

Device set to use cuda:0


<|user|>
Generate Linkedin Post on this AI</s>
<|assistant|>
Looking for a new career path in the exciting world of AI? Whether you're an experienced professional or just starting out, this is the perfect opportunity to invest in your future and up-skill yourself with the latest advancements in Artificial Intelligence (AI).

At [Company Name], we believe that AI is the future of business and we are proud to offer our employees the opportunity to gain hands-on experience and gain valuable industry knowledge. Our AI training program allows our employees to learn and develop their skills with the latest tools and technologies, while also contributing to our company's growth and success.

Here are just a few of the benefits that you can expect from this program
<|user|>
Generate Twitter tweet on this AI</s>
<|assistant|>
"With the advancements in AI, our world is becoming smarter and more efficient. From self-driving cars to chatbot assistants, AI is transforming how we see and interact wi

# **3. RunnablePassthrough**

### It just returns what ever input you give

In [None]:
from langchain_huggingface import ChatHuggingFace,HuggingFacePipeline
from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnableSequence,RunnableParallel,RunnablePassthrough
from langchain_core.prompts import PromptTemplate
import os

os.environ["HF_HOME"] = 'D:/huggingface_cache'

llm = HuggingFacePipeline.from_model_id(
    model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task="text-generation",
    device = 0,
    pipeline_kwargs=dict(
        max_new_tokens=150,
        temperature = 0.8
    )
)

model = ChatHuggingFace(llm = llm)

prompt1 = PromptTemplate(
    template='Write a one liner joke about {topic}',
    input_variables=['topic']
)

parser = StrOutputParser()

prompt2 = PromptTemplate(
    template='Explain the following joke - {text}',
    input_variables=['text']
)

get_joke = RunnableSequence(prompt1,model,parser)

parallel_joke = RunnableParallel({
    "joke":RunnablePassthrough(),
    "explanation":RunnableSequence(prompt2,model,parser)
})

chain = RunnableSequence(get_joke,parallel_joke)

print(chain.invoke({'topic':'college'}))

Device set to use cuda:0


{'joke': '<|user|>\nWrite a one liner joke about college</s>\n<|assistant|>\n"I went to college and learned a thing or two about being a jerk."', 'explanation': '<|user|>\nExplain the following joke - <|user|>\nWrite a one liner joke about college</s>\n<|assistant|>\n"I went to college and learned a thing or two about being a jerk."</s>\n<|assistant|>\n"She was a sophomore, and she took it upon herself to study the dictionary. The professor was so impressed that he granted her an A."'}


In [None]:
from langchain_huggingface import ChatHuggingFace,HuggingFacePipeline
from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnableSequence,RunnableParallel,RunnablePassthrough
from langchain_core.prompts import PromptTemplate
import os

os.environ["HF_HOME"] = 'D:/huggingface_cache'

llm = HuggingFacePipeline.from_model_id(
    model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task="text-generation",
    device =0,
    pipeline_kwargs=dict(
        max_new_tokens=150,
        temperature = 0.8
    )
)

model = ChatHuggingFace(llm = llm)

print(model.invoke("Explain me this joke.\nI went to the aquarium this weekend, but I didn’t stay long. There’s something fishy about that place."))

Device set to use cuda:0


content="<|user|>\nExplain me this joke.\nI went to the aquarium this weekend, but I didn’t stay long. There’s something fishy about that place.</s>\n<|assistant|>\nSure, here's a retelling of the joke:\n\nYou hear some funny fish noises coming from the aquarium. You decide to visit and spend a few hours exploring the mysterious and fascinating world of fish. As you walk around, you see some amazing creations bobbing around in the tank. The water is crystal clear, and you can see fish with teeth, scales, and other intricate features.\n\nYou spend some time admiring the colorful fish, and you even catch a few yourself by playing around in the water. But then, you notice a strange change. The fish seem to be acting strange, and you're not sure what it is. They" additional_kwargs={} response_metadata={} id='run--fd53fc40-bec2-4440-ae7c-36a4db636f80-0'


#4. **RunnableLambda**

In [None]:
from langchain.schema.runnable import RunnableSequence,RunnableParallel,RunnablePassthrough,RunnableLambda

def word_counter(text):
  return len(text.split())

runnableLambdaText  = RunnableLambda(word_counter)

runnableLambdaText.invoke("<|user|>\nExplain me this joke.\nI went to the aquarium this weekend, but I didn’t stay long. There’s something fishy about that place.</s>\n<|assistant|>\nSure, here's a retelling of the joke:\n\nYou hear some funny fish noises coming from the aquarium. You decide to visit and spend a few hours exploring the mysterious and fascinating world of fish. As you walk around, you see some amazing creations bobbing around in the tank. The water is crystal clear, and you can see fish with teeth, scales, and other intricate features.\n\nYou spend some time admiring the colorful fish, and you even catch a few yourself by playing around in the water. But then, you notice a strange change. The fish seem to be acting strange, and you're not sure what it is. They")

132

In [None]:
from langchain_huggingface import ChatHuggingFace,HuggingFacePipeline
from langchain_core.prompts import PromptTemplate
from langchain.schema.runnable import RunnableLambda
import os

os.environ['HF_HOME']="D:/huggingface_cache"

llm = HuggingFacePipeline.from_model_id(
    model_id='TinyLlama/TinyLlama-1.1B-Chat-v1.0',
    task='text-generation',
    device=0,
    pipeline_kwargs=dict(
        temperature=0.5,
        max_new_tokens=100,
    )
)

model = ChatHuggingFace(llm=llm)

def word_count(text):
    return len(text.split())

prompt = PromptTemplate(
    template='Write a joke about {topic}',
    input_variables=['topic']
)


joke_gen_chain = RunnableSequence(prompt, model, parser)

## 1st method

# parallel_chain = RunnableParallel({
#     "joke": RunnablePassthrough(),
#     "word_count":RunnableLambda(word_count)
# })

## 2nd method

parallel_chain = RunnableParallel({
    "joke": RunnablePassthrough(),
    "word_count":RunnableLambda(lambda x : len(x.split()))
})

final_chain = RunnableSequence(joke_gen_chain, parallel_chain)

result = final_chain.invoke({'topic':'AI'})

final_result = """{} \n word count - {}""".format(result['joke'], result['word_count'])

print(final_result)

Device set to use cuda:0


<|user|>
Write a joke about AI</s>
<|assistant|>
Q: Can you summarize the joke about AI?

A: A: Sure!

AI: (laughs) "Hey, can you translate this joke into AI language?"

AI2: (smirks) "Sure, I can do that!"

AI: (voice-over) "It's always funny how humans get confused by AI's jokes."

AI: (voice-over) 
 word count - 50


# **5. Runnable Branch**

In [None]:
from langchain_huggingface import ChatHuggingFace,HuggingFacePipeline
from langchain_core.prompts import PromptTemplate
from langchain.schema.runnable import RunnableLambda,RunnableSequence,RunnableParallel,RunnablePassthrough,RunnableBranch
from langchain_core.output_parsers import StrOutputParser
import os

os.environ['HF_HOME']="D:/huggingface_cache"

llm = HuggingFacePipeline.from_model_id(
    model_id='TinyLlama/TinyLlama-1.1B-Chat-v1.0',
    task='text-generation',
    device=0,
    pipeline_kwargs=dict(
        temperature=0.5,
        max_new_tokens=350,
    )
)

model = ChatHuggingFace(llm=llm)

prompt1 = PromptTemplate(
    template='Write a detailed report on {topic}',
    input_variables=['topic']
)

prompt2 = PromptTemplate(
    template='Summarize the following text \n {text}',
    input_variables=['text']
)

parser = StrOutputParser()

report_gen_chain = prompt1 | model | parser

branch_chain = RunnableBranch(
    (lambda x : len(x.split())>300,prompt2 | model | parser),
    RunnablePassthrough()
)

def word_counter(text):
  return len(text.split())

runnableLambdaText  = RunnableLambda(word_counter)

final_chain = RunnableSequence(report_gen_chain, branch_chain)

print(final_chain.invoke({'topic':'Russia vs Ukraine'}))

Device set to use cuda:0


<|user|>
Write a detailed report on Russia vs Ukraine</s>
<|assistant|>
Introduction

The ongoing conflict between Ukraine and Russia has been a significant geopolitical and geoeconomic event that has had far-reaching implications for both countries and the global community. This report explores the historical, political, economic, and military dimensions of the conflict, as well as its impact on the international community and the global economy.

Historical Context

The conflict between Ukraine and Russia began in 2014 when pro-Russian separatists in eastern Ukraine declared an independent state called "Donetsk People's Republic" and "Luhansk People's Republic." The following year, Russia annexed Crimea, a peninsula on the Black Sea, from Ukraine. This move was seen as a violation of Ukraine's sovereignty and territorial integrity, and it triggered a series of events that led to the conflict.

The conflict has been characterized by a series of escalating events, including a military 

# **Document Loader**

## Text Loader

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_huggingface import HuggingFacePipeline,ChatHuggingFace
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
import os

os.environ["HF_HOME"] = 'D:/huggingface_cache'

llm = HuggingFacePipeline.from_model_id(
    model_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task = "text-generation",
    device= 0 ,
    pipeline_kwargs=dict(
        max_new_tokens=500,
        temperature = 0.7
    )
)

model = ChatHuggingFace(llm = llm)

prompt = PromptTemplate(
    template="Write a summary for the following poem - \n {poem}",
    input_variables= ['poem']
)

parser = StrOutputParser()

loader = TextLoader("cricket.txt",encoding="utf-8")

docs = loader.load()


chain = prompt | model | parser

result = chain.invoke({'poem':docs[0].page_content})
print(result)


# print(docs)

# print(type(docs))
# print(docs[0].page_content)

# print(docs[0].metadata)

Device set to use cuda:0
Token indices sequence length is longer than the specified maximum sequence length for this model (2482 > 2048). Running this sequence through the model will result in indexing errors


<|user|>
Write a summary for the following poem - 
 Beneath the sun or floodlight's gleam,

Cricket lives like a waking dream.

A field of green, a willowed sound,

Where legends rise and tales are found.

From dusty lanes where barefoot boys,

Chase every run with shrieks of joy,

To packed arenas roaring loud,

The game unites a global crowd.

A coin is tossed, the captains stare,

As tension thickens in the air.

Bat or bowl? A choice so bold,

A story new begins, retold.

The openers walk, calm yet brave,

Each stride a wave upon the wave.

They face the ball with narrowed eyes,

As silence grips the watching skies.

The bowler runs, a rhythmic beat,

Like thunder galloping on feet.

A leather flash, a wooden crack—

The ball takes flight, then tumbles back.

A flick through square, a drive through mid,

A lofted shot the fielder missed.

A single, double, sprint for three,

The crowd erupts in ecstasy.

But not for long—the trap is set,

The spinner loops, the pitch is wet.

A sud

# **PyPDFLoader**

In [None]:
from langchain_community.document_loaders import PyPDFLoader


loader = PyPDFLoader("/content/dl-curriculum.pdf")

docs = loader.load()

print(docs)

[Document(metadata={'producer': 'Skia/PDF m131 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'Deep Learning Curriculum', 'source': '/content/dl-curriculum.pdf', 'total_pages': 23, 'page': 0, 'page_label': '1'}, page_content='CampusXDeepLearningCurriculum\nA.ArtificialNeuralNetworkandhowtoimprovethem\n1.BiologicalInspiration\n● Understandingtheneuronstructure● Synapsesandsignaltransmission● Howbiologicalconceptstranslatetoartificialneurons\n2.HistoryofNeuralNetworks\n● Earlymodels(Perceptron)● BackpropagationandMLPs● The"AIWinter"andresurgenceofneuralnetworks● Emergenceofdeeplearning\n3.PerceptronandMultilayerPerceptrons(MLP)\n● Single-layerperceptronlimitations● XORproblemandtheneedforhiddenlayers● MLParchitecture\n4. LayersandTheirFunctions\n● InputLayer○ Acceptinginputdata● HiddenLayers○ Featureextraction● OutputLayer○ Producingfinalpredictions\n5.ActivationFunctions'), Document(metadata={'producer': 'Skia/PDF m131 Google Docs Renderer', 'creator': 'PyPDF', 

In [None]:
!pip install pypdf

Collecting pypdf
  Downloading pypdf-5.8.0-py3-none-any.whl.metadata (7.1 kB)
Downloading pypdf-5.8.0-py3-none-any.whl (309 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/309.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.7/309.7 kB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-5.8.0


# **Directory Loader**

In [None]:
from langchain_community.document_loaders import  PyPDFLoader,DirectoryLoader

loader = DirectoryLoader('Document-Loaders\Books',glob='*.pdf',loader_cls=PyPDFLoader)

docs = loader.lazy_load()

for document in docs:
    print(document.metadata)

print(len(docs))

# **Web Based Loader**

In [None]:
!pip install langchain_community

Collecting langchain_community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain_community)
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 k

In [None]:
!pip install langchain_huggingface

Collecting langchain_huggingface
  Downloading langchain_huggingface-0.3.0-py3-none-any.whl.metadata (996 bytes)
Downloading langchain_huggingface-0.3.0-py3-none-any.whl (27 kB)
Installing collected packages: langchain_huggingface
Successfully installed langchain_huggingface-0.3.0


In [None]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_huggingface import HuggingFacePipeline,ChatHuggingFace
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
import os

os.environ["HF_HOME"] = 'D:/huggingface_cache'

llm = HuggingFacePipeline.from_model_id(
    model_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task = "text-generation",
    device= 0 ,
    pipeline_kwargs=dict(
        max_new_tokens=500,
        temperature = 0.7
    )
)

model = ChatHuggingFace(llm = llm)

prompt = PromptTemplate(
    template="Write a summary for the following poem - \n {poem}",
    input_variables= ['poem']
)

parser = StrOutputParser()

url = 'https://www.amazon.in/ref=PS5BAU25QCPS5fortnitedisc/dp/B08GZ6QNTC/?_encoding=UTF8&pd_rd_w=I3SNY&content-id=amzn1.sym.3b2d0011-a8ec-4315-a031-cca3c26cfcd6&pf_rd_p=3b2d0011-a8ec-4315-a031-cca3c26cfcd6&pf_rd_r=0JM9Z321WH17RVCB54QJ&pd_rd_wg=t2aNx&pd_rd_r=ff94a82f-a4df-4de0-b6aa-7f11cd47de84&ref_=pd_hp_d_atf_unk'
loader = WebBaseLoader(url)


prompt = PromptTemplate(
    template='Answer the following question \n {question} from the following text - \n {text}',
    input_variables=['question','text']
)

docs = loader.load()

parser = StrOutputParser()

chain = prompt | model | parser

print(chain.invoke({'question':'What is the prodcut that we are talking about?', 'text':docs[0].page_content}))


print(docs[0].page_content)































































































Sony DualSense Wireless Controller White (PlayStation 5) : Amazon.in: Video Games










































  Shortcuts menu


Skip to



        Main content
      



        About this item
      



        About this item
      



        About this item
      



        Buying options
      



        Compare with similar items
      



        Videos
      



        Reviews
      




      Keyboard shortcuts
  




Search

alt
+
/







Cart

shift
+
alt
+
C







Home

shift
+
alt
+
H







Orders

shift
+
alt
+
O







Add to cart

shift
+
alt
+
K







Open/close shortcuts menu

shift
+
alt
+
Z










To move between items, use your keyboard's up or down arrows.















.in









                   Delivering to Mumbai 400001
                

                   Update location
                




















Video Games


Select the depa

# **CSV Loader**

In [None]:
from langchain_community.document_loaders import CSVLoader

loader = CSVLoader(file_path='Social_Network_Ads.csv')

docs = loader.load()

print(len(docs))
print(docs[1])

400
page_content='User ID: 15810944
Gender: Male
Age: 35
EstimatedSalary: 20000
Purchased: 0' metadata={'source': 'Social_Network_Ads.csv', 'row': 1}


# **Text Splitter**

### **Length based splitting**

In [None]:
from langchain.text_splitter import CharacterTextSplitter

docs = loader.load()
splitter = CharacterTextSplitter(
    chunk_size = 100,
    chunk_overlap=0,
    separator=" "
)

text = """
You vexed shy mirth now noise. Talked him people valley add use her depend letter. Allowance too applauded now way something recommend. Mrs age men and trees jokes fancy. Gay pretended engrossed eagerness continued ten. Admitting day him contained unfeeling attention mrs out.

Ye on properly handsome returned throwing am no whatever. In without wishing he of picture no exposed talking minutes. Curiosity continual belonging offending so explained it exquisite. Do remember to followed yourself material mr recurred carriage. High drew west we no or at john. About or given on witty event. Or sociable up material bachelor bringing landlord confined. Busy so many in hung easy find well up. So of exquisite my an explained remainder. Dashwood denoting securing be on perceive my laughing so.

As am hastily invited settled at limited civilly fortune me. Really spring in extent an by. Judge but built gay party world. Of so am he remember although required. Bachelor unpacked be advanced at. Confined in declared marianne is vicinity.

May musical arrival beloved luckily adapted him. Shyness mention married son she his started now. Rose if as past near were. To graceful he elegance oh moderate attended entrance pleasure. Vulgar saw fat sudden edward way played either. Thoughts smallest at or peculiar relation breeding produced an. At depart spirit on stairs. She the either are wisdom praise things she before. Be mother itself vanity favour do me of. Begin sex was power joy after had walls miles.
"""
result = splitter.split_text(text)
print(result)

['You vexed shy mirth now noise. Talked him people valley add use her depend letter. Allowance too', 'applauded now way something recommend. Mrs age men and trees jokes fancy. Gay pretended engrossed', 'eagerness continued ten. Admitting day him contained unfeeling attention mrs out.\n\nYe on properly', 'handsome returned throwing am no whatever. In without wishing he of picture no exposed talking', 'minutes. Curiosity continual belonging offending so explained it exquisite. Do remember to followed', 'yourself material mr recurred carriage. High drew west we no or at john. About or given on witty', 'event. Or sociable up material bachelor bringing landlord confined. Busy so many in hung easy find', 'well up. So of exquisite my an explained remainder. Dashwood denoting securing be on perceive my', 'laughing so.\n\nAs am hastily invited settled at limited civilly fortune me. Really spring in extent', 'an by. Judge but built gay party world. Of so am he remember although required. Bachelo

In [None]:
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("/content/dl-curriculum.pdf")

splitter = CharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=0
)

docs = loader.lazy_load()
result = splitter.split_documents(docs)
print(result[3].page_content)

○ Decidingdepthandwidth● Techniques:○ Gridsearch○ RandomSearch○ Bayesianoptimization
13.VanishingandExplodingGradients
● Problemsindeepnetworks● Solutions:○ Properweightinitialization○ UseofReLUactivationfunctions
14.WeightInitializationStrategies
● Xavier/GlorotInitialization● HeInitialization
15.BatchNormalization
● Normalizinginputsofeachlayer● Acceleratingtraining● Reducingdependenceoninitialization
B.ConvolutionNeuralNetworks
1.ChallengeswithMLPsforImageData
● Highdimensionality● Lackofspatialinvariance
2.AdvantagesofCNNs
● Parametersharing● Localconnectivity


In [None]:
!pip install pypdf

Collecting pypdf
  Downloading pypdf-5.8.0-py3-none-any.whl.metadata (7.1 kB)
Downloading pypdf-5.8.0-py3-none-any.whl (309 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/309.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.7/309.7 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-5.8.0


# **RecusiveCharacterTextSplitter  : Text  Structure**

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text = """
Space exploration has led to incredible scientific discoveries. From landing on the Moon to exploring Mars, humanity continues to push the boundaries of what’s possible beyond our planet.

These missions have not only expanded our knowledge of the universe but have also contributed to advancements in technology here on Earth. Satellite communications, GPS, and even certain medical imaging techniques trace their roots back to innovations driven by space programs.
"""

splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=0
)

result = splitter.split_text(text)

print(result)
print("Len: ",len(result))

['Space exploration has led to incredible scientific discoveries. From landing on the Moon to exploring Mars, humanity continues to push the boundaries of what’s possible beyond our planet.', 'These missions have not only expanded our knowledge of the universe but have also contributed to advancements in technology here on Earth. Satellite communications, GPS, and even certain medical imaging techniques trace their roots back to innovations driven by space programs.']
Len:  2


# **RecusiveCharacterTextSplitter  : Text  Structure - Language**

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter,Language

text = """
class Student:
    def __init__(self, name, age, grade):
        self.name = name
        self.age = age
        self.grade = grade  # Grade is a float (like 8.5 or 9.2)

    def get_details(self):
        return self.name"

    def is_passing(self):
        return self.grade >= 6.0


# Example usage
student1 = Student("Aarav", 20, 8.2)
print(student1.get_details())

if student1.is_passing():
    print("The student is passing.")
else:
    print("The student is not passing.")

"""

splitter = RecursiveCharacterTextSplitter.from_language(
    chunk_size=300,
    chunk_overlap=0,
    language = Language.PYTHON
)

result = splitter.split_text(text)

print(result)
print("Len: ",len(result))
print(result[1])

['class Student:\n    def __init__(self, name, age, grade):\n        self.name = name\n        self.age = age\n        self.grade = grade  # Grade is a float (like 8.5 or 9.2)\n\n    def get_details(self):\n        return self.name"\n\n    def is_passing(self):\n        return self.grade >= 6.0', '# Example usage\nstudent1 = Student("Aarav", 20, 8.2)\nprint(student1.get_details())\n\nif student1.is_passing():\n    print("The student is passing.")\nelse:\n    print("The student is not passing.")']
Len:  2
# Example usage
student1 = Student("Aarav", 20, 8.2)
print(student1.get_details())

if student1.is_passing():
    print("The student is passing.")
else:
    print("The student is not passing.")


In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter,Language

text = """
# Project Name: Smart Student Tracker

A simple Python-based project to manage and track student data, including their grades, age, and academic status.


## Features

- Add new students with relevant info
- View student details
- Check if a student is passing
- Easily extendable class-based design


## 🛠 Tech Stack

- Python 3.10+
- No external dependencies


## Getting Started

1. Clone the repo
   ```bash
   git clone https://github.com/your-username/student-tracker.git

"""

# Initialize the splitter
splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.MARKDOWN,
    chunk_size=200,
    chunk_overlap=0,
)

# Perform the split
chunks = splitter.split_text(text)

print(len(chunks))
print(chunks[0])

3
# Project Name: Smart Student Tracker

A simple Python-based project to manage and track student data, including their grades, age, and academic status.


# **Semantic text splitter**

In [None]:

from langchain_experimental.text_splitter import SemanticChunker
from langchain_openai.embeddings import OpenAIEmbeddings
from dotenv import load_dotenv

load_dotenv()

text_splitter = SemanticChunker(
    OpenAIEmbeddings(), breakpoint_threshold_type="standard_deviation",
    breakpoint_threshold_amount=3
)

sample = """
Farmers were working hard in the fields, preparing the soil and planting seeds for the next season. The sun was bright, and the air smelled of earth and fresh grass. The Indian Premier League (IPL) is the biggest cricket league in the world. People all over the world watch the matches and cheer for their favourite teams.


Terrorism is a big danger to peace and safety. It causes harm to people and creates fear in cities and villages. When such attacks happen, they leave behind pain and sadness. To fight terrorism, we need strong laws, alert security forces, and support from people who care about peace and safety.
"""

docs = text_splitter.create_documents([sample])
print(len(docs))
print(docs)




# **Vector Storage**

In [None]:
!pip install langchain chromadb openai tiktoken pypdf langchain_openai langchain-community

Collecting chromadb
  Downloading chromadb-1.0.15-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Collecting pypdf
  Downloading pypdf-5.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain_openai
  Downloading langchain_openai-0.3.28-py3-none-any.whl.metadata (2.3 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.22.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.6 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.35.0-py3-none-any.whl.metadata (1.5 kB)
Colle

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

In [None]:
from langchain.schema import Document

doc1 = Document(
        page_content="Virat Kohli is one of the most successful and consistent batsmen in IPL history. Known for his aggressive batting style and fitness, he has led the Royal Challengers Bangalore in multiple seasons.",
        metadata={"team": "Royal Challengers Bangalore"}
    )
doc2 = Document(
        page_content="Rohit Sharma is the most successful captain in IPL history, leading Mumbai Indians to five titles. He's known for his calm demeanor and ability to play big innings under pressure.",
        metadata={"team": "Mumbai Indians"}
    )
doc3 = Document(
        page_content="MS Dhoni, famously known as Captain Cool, has led Chennai Super Kings to multiple IPL titles. His finishing skills, wicketkeeping, and leadership are legendary.",
        metadata={"team": "Chennai Super Kings"}
    )
doc4 = Document(
        page_content="Jasprit Bumrah is considered one of the best fast bowlers in T20 cricket. Playing for Mumbai Indians, he is known for his yorkers and death-over expertise.",
        metadata={"team": "Mumbai Indians"}
    )
doc5 = Document(
        page_content="Ravindra Jadeja is a dynamic all-rounder who contributes with both bat and ball. Representing Chennai Super Kings, his quick fielding and match-winning performances make him a key player.",
        metadata={"team": "Chennai Super Kings"}
    )
docs = [doc1, doc2, doc3, doc4, doc5]

In [None]:
embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vector_store = Chroma(
    embedding_function = embedding_function,
    persist_directory='my_chroma_db',
    collection_name='sample'
    )

  embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  vector_store = Chroma(


In [None]:
# add documents
vector_store.add_documents(docs) # this ouput are unique id for every documnet

['82332718-c98d-4a01-9a40-0e274b739bd3',
 'e00ef0dc-a7c3-44fc-9e5a-51960d4caa9b',
 '85877880-a3f9-457e-87d8-9764f3d864aa',
 '7d6c5265-e54c-4b52-8a6a-2297c8eac247',
 'ddfbc117-2e37-416c-9573-1b33983c2e83']

In [None]:
# view documents
vector_store.get(include = ['embeddings','documents', 'metadatas'])

{'ids': ['82332718-c98d-4a01-9a40-0e274b739bd3',
  'e00ef0dc-a7c3-44fc-9e5a-51960d4caa9b',
  '85877880-a3f9-457e-87d8-9764f3d864aa',
  '7d6c5265-e54c-4b52-8a6a-2297c8eac247',
  'ddfbc117-2e37-416c-9573-1b33983c2e83'],
 'embeddings': array([[ 0.00994725,  0.06914335, -0.0514712 , ..., -0.0354334 ,
          0.01284813,  0.01248285],
        [ 0.00127746,  0.0312985 , -0.02375378, ..., -0.00518364,
         -0.03280616,  0.02737711],
        [-0.10265916,  0.02650809,  0.02271503, ..., -0.03359751,
         -0.07984945, -0.01507709],
        [ 0.02123393, -0.0246855 , -0.0449437 , ..., -0.1099581 ,
          0.00572559,  0.09915373],
        [ 0.01873975,  0.04382844, -0.04304259, ..., -0.07801618,
         -0.07840681, -0.00304193]]),
 'documents': ['Virat Kohli is one of the most successful and consistent batsmen in IPL history. Known for his aggressive batting style and fitness, he has led the Royal Challengers Bangalore in multiple seasons.',
  "Rohit Sharma is the most successful ca

In [None]:
# search documents
vector_store.similarity_search(
    query='Who among these are a bowler?',
    k=2  # no. of document u want to retrieve
)

[Document(metadata={'team': 'Mumbai Indians'}, page_content='Jasprit Bumrah is considered one of the best fast bowlers in T20 cricket. Playing for Mumbai Indians, he is known for his yorkers and death-over expertise.'),
 Document(metadata={'team': 'Mumbai Indians'}, page_content="Rohit Sharma is the most successful captain in IPL history, leading Mumbai Indians to five titles. He's known for his calm demeanor and ability to play big innings under pressure.")]

In [None]:
# search with similarity score
vector_store.similarity_search_with_score(
    query='Who among these are a bowler?',
    k=3
)

[(Document(metadata={'team': 'Mumbai Indians'}, page_content='Jasprit Bumrah is considered one of the best fast bowlers in T20 cricket. Playing for Mumbai Indians, he is known for his yorkers and death-over expertise.'),
  0.9693601727485657),
 (Document(metadata={'team': 'Mumbai Indians'}, page_content="Rohit Sharma is the most successful captain in IPL history, leading Mumbai Indians to five titles. He's known for his calm demeanor and ability to play big innings under pressure."),
  1.1493451595306396),
 (Document(metadata={'team': 'Chennai Super Kings'}, page_content='Ravindra Jadeja is a dynamic all-rounder who contributes with both bat and ball. Representing Chennai Super Kings, his quick fielding and match-winning performances make him a key player.'),
  1.1851273775100708)]

In [None]:
# meta-data filtering
vector_store.similarity_search_with_score(
    query="",
    filter={"team": "Chennai Super Kings"}
)

[(Document(metadata={'team': 'Chennai Super Kings'}, page_content='MS Dhoni, famously known as Captain Cool, has led Chennai Super Kings to multiple IPL titles. His finishing skills, wicketkeeping, and leadership are legendary.'),
  1.8436007499694824),
 (Document(metadata={'team': 'Chennai Super Kings'}, page_content='Ravindra Jadeja is a dynamic all-rounder who contributes with both bat and ball. Representing Chennai Super Kings, his quick fielding and match-winning performances make him a key player.'),
  1.8909369707107544)]

In [None]:
# update documents
updated_doc1 = Document(
    page_content="Virat Kohli, the former captain of Royal Challengers Bangalore (RCB), is renowned for his aggressive leadership and consistent batting performances. He holds the record for the most runs in IPL history, including multiple centuries in a single season. Despite RCB not winning an IPL title under his captaincy, Kohli's passion and fitness set a benchmark for the league. His ability to chase targets and anchor innings has made him one of the most dependable players in T20 cricket.",
    metadata={"team": "Royal Challengers Bangalore"}
)

vector_store.update_document(document_id="82332718-c98d-4a01-9a40-0e274b739bd3",document = updated_doc1)

In [None]:
# view documents
vector_store.get(include=['embeddings','documents', 'metadatas'])

{'ids': ['82332718-c98d-4a01-9a40-0e274b739bd3',
  'e00ef0dc-a7c3-44fc-9e5a-51960d4caa9b',
  '85877880-a3f9-457e-87d8-9764f3d864aa',
  '7d6c5265-e54c-4b52-8a6a-2297c8eac247',
  'ddfbc117-2e37-416c-9573-1b33983c2e83'],
 'embeddings': array([[-0.00233746,  0.05902081, -0.04774044, ..., -0.07264049,
          0.00276782, -0.00344088],
        [ 0.00127746,  0.0312985 , -0.02375378, ..., -0.00518364,
         -0.03280616,  0.02737711],
        [-0.10265916,  0.02650809,  0.02271503, ..., -0.03359751,
         -0.07984945, -0.01507709],
        [ 0.02123393, -0.0246855 , -0.0449437 , ..., -0.1099581 ,
          0.00572559,  0.09915373],
        [ 0.01873975,  0.04382844, -0.04304259, ..., -0.07801618,
         -0.07840681, -0.00304193]]),
 'documents': ["Virat Kohli, the former captain of Royal Challengers Bangalore (RCB), is renowned for his aggressive leadership and consistent batting performances. He holds the record for the most runs in IPL history, including multiple centuries in a sin

In [None]:
# delete document
vector_store.delete(ids=['09a39dc6-3ba6-4ea7-927e-fdda591da5e4'])