In [17]:
%pip install -q youtube-transcript-api langchain-community faiss-cpu tiktoken python-dotenv

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from urllib.parse import urlparse, parse_qs

In [None]:
def extract_youtube_id(url):
    parsed_url = urlparse(url)
    query_params = parse_qs(parsed_url.query)
    video_id = query_params.get('v', [None])[0]
    return video_id

url = "https://www.youtube.com/watch?v=Mi0QycA81go"
video_id = extract_youtube_id(url)

try:
    transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en', 'hi'])
    
    transcript = " ".join(chunk['text'] for chunk in transcript_list)
    print(transcript)
    
except TranscriptsDisabled: 
    print("Transcripts are disabled for this video.")

In [23]:
transcript_list

[{'text': 'Hi everyone and welcome to a new video.',
  'start': 0.0,
  'duration': 4.72},
 {'text': 'This one is around trading arbitragees',
  'start': 2.399,
  'duration': 4.4},
 {'text': 'in sports book. Um that exists for',
  'start': 4.72,
  'duration': 5.12},
 {'text': 'markets like IPL trading. Um basically',
  'start': 6.799,
  'duration': 6.8},
 {'text': "it's not closer to a MPL or a dream 11.",
  'start': 9.84,
  'duration': 5.919},
 {'text': 'Those are some markets to trade on you',
  'start': 13.599,
  'duration': 5.241},
 {'text': 'know sports opinions in India.',
  'start': 15.759,
  'duration': 5.921},
 {'text': 'Um this is not that um there are other',
  'start': 18.84,
  'duration': 5.4},
 {'text': 'markets that exist where people can',
  'start': 21.68,
  'duration': 4.4},
 {'text': 'trade on the final outcome of the match.',
  'start': 24.24,
  'duration': 3.84},
 {'text': 'Is the match going to win? Is Kolkata',
  'start': 26.08,
  'duration': 3.279},
 {'text': 'go

# Text Splitter

In [24]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
chunks = splitter.create_documents([transcript])

In [25]:
len(chunks)

167

In [27]:
chunks[0]

Document(metadata={}, page_content='Hi everyone and welcome to a new video. This one is around trading arbitragees in sports book. Um that exists for markets like IPL trading. Um basically it\'s not closer to a MPL or a dream 11. Those are some markets to trade on you know sports opinions in India. Um this is not that um there are other markets that exist where people can trade on the final outcome of the match. Is the match going to win? Is Kolkata going to win? Is Kolkata going to lose? Um when you do that uh you are taking some risk. If you say Kolkata is going to win and Kolkata loses, you lose all of your money. You could bet 100 rupees, a 1,000 rupees or a lakh. Um but is there a way to make risk-free money from these markets? Um you might say, "No, that sounds too good to be true." Um or that sounds like a fatty scheme. Um but if you heard of HFTs, that\'s traditionally what they\'ve been doing for years. Um they are squeezing out inefficiencies from markets without taking any r

In [28]:
embeddings = OllamaEmbeddings(
    model="mistral",
)
vector_store = FAISS.from_documents(chunks, embeddings)

In [29]:
vector_store.index_to_docstore_id

{0: '7e7bcc98-bf95-4dba-adb8-783eb02e784d',
 1: '9c2772f7-a9c5-40f7-9510-3708a9d699c1',
 2: '1dd785d7-8eeb-4d08-a689-e7948cfabcb5',
 3: 'f033e777-f85b-48a6-94f0-baf36b7c8de6',
 4: 'e7931c92-e8e3-4c3c-9870-886f818671ed',
 5: 'c69c266b-b2ad-4238-b4cd-19a4fb54af77',
 6: 'e457d2e0-3f52-40b6-9294-ad8ffc2c6b56',
 7: '2a0ca562-268c-4193-8cc3-cee9add1c82c',
 8: '51b3866a-670a-43de-b8fa-81767b8e38d6',
 9: '36080cb2-12a0-43ad-a1be-33510dddcbb7',
 10: 'c9d141cb-4dfd-4362-adfa-4655cdb6c4a4',
 11: '758029c6-fac3-49c5-9917-43e802534572',
 12: '32201b22-6331-429b-a8bf-928bca4c7468',
 13: 'c93104f6-4007-4ba9-8522-9e95dce9b64d',
 14: 'd37c9cf4-cfce-4cc2-a48f-776a0a8e1d1d',
 15: '3744af71-3334-4450-ba70-fa908f0ccbe6',
 16: '04370e2f-fb12-46a5-abe0-a27891e47aaf',
 17: 'add2b6b7-20b3-4017-a208-f0581fc53736',
 18: 'aea553cf-decc-40f4-b82e-521a5526daad',
 19: '461070ab-ba70-4045-93ae-bc7507c63212',
 20: '6c22dc7d-c8f0-435d-abfd-3d8bc25b2781',
 21: '35f6a8ed-3484-4502-8667-ef3d934f2fe0',
 22: '470a2d11-c5f3-

# Retrieval

In [30]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k":4})

In [31]:
retriever

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000197E79363C0>, search_kwargs={'k': 4})

In [32]:
retriever.invoke("Is this chrome extension locally installed?")

[Document(id='cb17cec1-a511-458f-a24e-eb1ac55868d9', metadata={}, page_content="and five. Um, I'm able to place an order over here. Why can't I place the same order over here? 735. I'm trying to buy. Um, create order. Token ID. Create order. Options. Nope. User order. Token ID, price, size, side. Uh, yeah, I don't see anything wrong there. Let's try it one more time. Uh, not enough balance or allowance. Uh I am able to place the order from here. Do I need to allow my APIs my APIs to have some allowance uh on this account or something like that? That's my guess. Um or I'm just not signing the message correctly. Um that's the other guess. It's probably no that shouldn't be the case. I say I was able to get derive the private key the API keys. Um so the serer object is correct. I would assume that's my last guess that the signer object is incorrect. The Ethereum private key should be in a different format and not a string. Um that's my guess. That's the only thing that that might be wrong

# Augmentation

In [33]:
llm = ChatOllama(model="mistral", temperature=0.2)

In [39]:
prompt = PromptTemplate(
    template="""
    You are a helpful assistant.
    Answer only from the provided transcript context.
    If the context is insufficient, just say you don't know.
    
    {context}
    question: {question}
    """,
    input_variables=['context', 'question']
)

In [40]:
question = "is the bot working locally? If yes tell me the basic steps taken to create it."
retrived_docs = retriever.invoke(question)

In [42]:
context_text = "\n\n".join(doc.page_content for doc in retrived_docs)

In [43]:
final_prompt = prompt.invoke({"context":context_text, "question":question})

# Generation

In [44]:
answer = llm.invoke(final_prompt)
print(answer.content)

 The bot is being developed locally, and here are the basic steps taken to create it:

1. Set up a development environment for Solidity ( Ethereum's programming language) and Truffle Suite (a popular development framework for Ethereum).

2. Write the smart contract code using Solidity. In this case, the contract is for managing USDC on the Polygon chain. The contract includes functions like `approve` to allow a specific contract to debit from the user's wallet.

3. Compile and deploy the contract to the local Ethereum network (e.g., Ganache) using Truffle.

4. Implement the frontend for interacting with the smart contract, which includes placing orders and managing the market. This is typically done using a web3.js library or a React-based frontend.

5. Integrate APIs to fetch market data and manage user accounts if needed.

6. Test the bot locally to ensure it works as expected before deploying it to the mainnet or testnet.


# Chains

In [45]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [46]:
def format_docs(docs):
    context_text = "\n\n".join(doc.page_content for doc in retrived_docs)
    return context_text

In [47]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [49]:
parallel_chain.invoke('What is video all about ?')

{'context': "the if you look at the main function you get the user's wallet you get the chain which in our case will be the polygon chain. um we don't need a bunch of these contracts. you only need the USDC contract because we're not using a custom token to you know um to uh over here in poly market I'm not using a custom token I'm using USDC um and hence I don't have to worry about the custom token here just the USDC contract and you can call USDC do approve u to approve a certain amount which in our case is you know I don't know where the amount is sent oh there you go basically all of our USDC balance I am allowing for a specific speific contract um to be able to debit. This is what this lines of code does. If you've ever done ERC20 generally um if you look at ERC20 uh solidity code you'll find um in the ERC20 spec or even if you look at the contract there's an allowance function uh ERC20 create [Music] XRC20 spec [Music] There is expected to be an approve function and an allowance 

In [50]:
parser = StrOutputParser()

In [51]:
main_chain = parallel_chain | prompt | llm | parser

In [52]:
main_chain.invoke("Can you tell me a quick summary?")

' The user is discussing a system where they act as a market maker in a Polygon chain-based market using USDC. They need to approve the contract for a specific amount, which is their entire USDC balance. The system involves an array structure for buy and sell orders, with each side being a series of key-value pairs representing the quantity available at different prices. There seems to be an issue with placing the same order in two places, possibly due to insufficient balance or allowance, or incorrect signing of messages. The user suspects that the signer object might be incorrect and should be in a different format from a string.'