In [None]:
!curl -fsSL https://ollama.com/install.sh | sh
!nohup ollama serve > output.log 2>&1 &
!ollama pull phi4

In [None]:
!pip install langchain langchain_community langchain_ollama chromadb beautifulsoup4 langchain-chroma

In [None]:
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

# Step 1: Scrape the webpage
def scrape_webpage(url):
    # Use BeautifulSoup to parse specific content (e.g., 'content-area' class)
    bs4_strainer = bs4.SoupStrainer(class_="content-area")
    loader = WebBaseLoader(
        web_paths=(url,),
        bs_kwargs={"parse_only": bs4_strainer},
    )
    docs = loader.load()
    return docs

# Step 2: Process and store data in vector store
def process_documents(docs):
    # Split documents into smaller chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200
    )
    splits = text_splitter.split_documents(docs)

    # Create embeddings using Ollama
    embeddings = OllamaEmbeddings(model="phi4")

    # Store in Chroma vector store
    vectorstore = Chroma.from_documents(
        documents=splits,
        embedding=embeddings,
        collection_name="web_content"
    )
    return vectorstore

# Step 3: Set up RAG pipeline for real-time querying
def setup_rag_pipeline(vectorstore):
    # Initialize Phi-4 model via Ollama
    llm = ChatOllama(
        model="phi4",
        base_url="http://localhost:11434",
        temperature=0
    )

    # Define prompt template
    template = """Answer the question based only on the following context:
    {context}

    Question: {question}
    """
    prompt = ChatPromptTemplate.from_template(template)

    # Create retriever from vector store
    retriever = vectorstore.as_retriever()

    # Build RAG chain
    rag_chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    return rag_chain

# Step 4: Main execution
def main():
    # Example URL to scrape (replace with your target URL)
    url = "https://pythonology.eu/using-pandas_ta-to-generate-technical-indicators-and-signals/"

    # Scrape webpage
    print("Scraping webpage...")
    docs = scrape_webpage(url)

    # Process and store in vector store
    print("Processing and storing data...")
    vectorstore = process_documents(docs)

    # Set up RAG pipeline
    print("Setting up RAG pipeline...")
    rag_chain = setup_rag_pipeline(vectorstore)

    # Example real-time query
    query = "What are the main technical indicators mentioned in the article?"
    print("Executing query:", query)
    response = rag_chain.invoke(query)
    print("\nResponse:", response)

    # Optional: Clean up vector store
    vectorstore.delete_collection()

if __name__ == "__main__":
    main()



Scraping webpage...
Processing and storing data...


  embeddings = OllamaEmbeddings(model="phi4")


ValueError: Error raised by inference endpoint: HTTPConnectionPool(host='localhost', port=11434): Max retries exceeded with url: /api/embeddings (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7a800dea7e90>: Failed to establish a new connection: [Errno 111] Connection refused'))

In [None]:
def scrape_webpage(url):
    # Use BeautifulSoup to parse specific content (e.g., 'content-area' class)
    bs4_strainer = bs4.SoupStrainer(class_="content-area")
    loader = WebBaseLoader(
        web_paths=(url,),
        bs_kwargs={"parse_only": bs4_strainer},
    )
    docs = loader.load()
    return docs

doc = scrape_webpage('https://pythonology.eu/using-pandas_ta-to-generate-technical-indicators-and-signals/')

In [None]:
doc

[Document(metadata={'source': 'https://pythonology.eu/using-pandas_ta-to-generate-technical-indicators-and-signals/', 'title': 'Previous'}, page_content="Disclaimer:Remember that trading involves risk, and most traders lose money. This blog post is for educational purposes only. Always practice risk management and consider seeking professional advice before trading.Table of ContentsDisclaimer:What Is Algorithmic Trading? Advantages of Algorithmic TradingDisadvantages of Algorithmic TradingUsing Python for Algorithmic TradingTechnical Analysis with PythonPlotting Simple Moving Averages (SMA)Plotting RSIA Simple Trading StrategyConclusionWhat Is Algorithmic Trading?Algorithmic trading, often referred to as algo trading, uses computer algorithms to automate trading decisions based on predefined criteria. These criteria are set by the traders, for example, you might tell the program to Buy when the last 5 candlesticks have an upward trend, or Sell when the last 5 candlesticks are following

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200
    )
splits = text_splitter.split_documents(doc)

In [None]:
splits

[Document(metadata={'source': 'https://pythonology.eu/using-pandas_ta-to-generate-technical-indicators-and-signals/', 'title': 'Previous'}, page_content='Disclaimer:Remember that trading involves risk, and most traders lose money. This blog post is for educational purposes only. Always practice risk management and consider seeking professional advice before trading.Table of ContentsDisclaimer:What Is Algorithmic Trading? Advantages of Algorithmic TradingDisadvantages of Algorithmic TradingUsing Python for Algorithmic TradingTechnical Analysis with PythonPlotting Simple Moving Averages (SMA)Plotting RSIA Simple Trading StrategyConclusionWhat Is Algorithmic Trading?Algorithmic trading, often referred to as algo trading, uses computer algorithms to automate trading decisions based on predefined criteria. These criteria are set by the traders, for example, you might tell the program to Buy when the last 5 candlesticks have an upward trend, or Sell when the last 5 candlesticks are following