In [3]:
import os
from langgraph.prebuilt import create_react_agent
from langchain.chat_models import init_chat_model
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.messages import HumanMessage
from dotenv import load_dotenv
from langchain_community.tools import DuckDuckGoSearchResults

# 1. Setup Environment and Memory
load_dotenv()
memory = MemorySaver()
config = {"configurable": {"thread_id": "scout_001"}}

# 2. Initialize Model (Llama 3.3 via Groq)
model = init_chat_model("llama-3.3-70b-versatile", model_provider="groq", max_tokens=4000)

# 3. Define the Search Tool
# We use DuckDuckGo to find latest competitor/product news
web_search_tool = DuckDuckGoSearchResults(output_format="list")
tools = [web_search_tool]

# 4. Create the React Agent (The Scout)
# This agent can now "Think" and then "Search" if it lacks info
agent_executor = create_react_agent(model, tools, checkpointer=memory)

# 5. Execute the Scout Query
# We ask it to find technical truths to avoid "rubbish" content
scout_query = """
Search for the latest technical capabilities of 'Claude Coworker' by Anthropic. 
Identify 3 specific things it can do with a local filesystem that a standard chatbot cannot.
Then, identify a 'Marketing Gap'‚Äîsomething it doesn't handle well (like privacy concerns or specific industries).
"""

print("üïµÔ∏è‚Äç‚ôÇÔ∏è Scout Agent is searching for technical truths...")
for event in agent_executor.stream(
    {"messages": [HumanMessage(content=scout_query)]}, config
):
    for value in event.values():
        # This will print the steps (Thought, Action, Observation)
        if "messages" in value:
            last_msg = value["messages"][-1]
            if hasattr(last_msg, 'content') and last_msg.content:
                print(f"\n--- Scout Progress ---\n{last_msg.content}")

/tmp/ipykernel_1564896/1103047228.py:24: LangGraphDeprecatedSinceV10: create_react_agent has been moved to `langchain.agents`. Please update your import to `from langchain.agents import create_agent`. Deprecated in LangGraph V1.0 to be removed in V2.0.
  agent_executor = create_react_agent(model, tools, checkpointer=memory)


üïµÔ∏è‚Äç‚ôÇÔ∏è Scout Agent is searching for technical truths...

--- Scout Progress ---
[{"snippet": "12 Jan 2026 ¬∑ Key capabilities. Direct local file access: Claude can read from and write to your local files without manual uploads or downloads. Sub-agent coordination ...", "title": "Getting started with Cowork | Claude Help Center", "link": "https://support.claude.com/en/articles/13345190-getting-started-with-cowork"}, {"snippet": "16 Jan 2026 ¬∑ It runs in a terminal and gives Claude the ability to read codebases, write files, and execute commands. ... locally, or use local files as input for external ...", "title": "Claude Cowork Tutorial: How to Use Anthropic's AI Desktop Agent", "link": "https://www.datacamp.com/tutorial/claude-cowork-tutorial"}, {"snippet": "12 Jan 2026 ¬∑ Cowork brings Claude Code's agentic capabilities to the Claude desktop app. Give Claude access to a folder, set a task, and let it work. Missing: local filesystem", "title": "Introducing Cowork: Claude Cod

In [8]:
import requests
from bs4 import BeautifulSoup
import os

def light_scout(url, day_name="Mon"):
    print(f"üïµÔ∏è‚Äç‚ôÇÔ∏è Lightweight Scouting: {url}...")
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
    }

    try:
        response = requests.get(url, headers=headers, timeout=15)
        response.raise_for_status()
        
        # Parse the HTML
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Remove "Rubbish" (Scripts, Styles, Nav, Footer)
        for element in soup(["script", "style", "nav", "footer", "header"]):
            element.decompose()
            
        # Get clean text
        clean_text = soup.get_text(separator='\n')
        
        # Save to your Intelligence folder
        intel_path = f"/nuvodata/User_data/shiva/Market_carousal/intelligence/{day_name}_scout.txt"
        os.makedirs(os.path.dirname(intel_path), exist_ok=True)
        
        with open(intel_path, "w") as f:
            f.write(clean_text)
            
        print(f"‚úÖ Intelligence Saved: {intel_path}")
        return clean_text
        
    except Exception as e:
        print(f"‚ùå Light Scout failed: {e}")
        return None

# Usage:
light_scout("https://www.anthropic.com/news/claude-3-5-sonnet")

üïµÔ∏è‚Äç‚ôÇÔ∏è Lightweight Scouting: https://www.anthropic.com/news/claude-3-5-sonnet...
‚úÖ Intelligence Saved: /nuvodata/User_data/shiva/Market_carousal/intelligence/Mon_scout.txt


'Announcements\nClaude 3.5 Sonnet\nJun 21, 2024\nTry on Claude.ai\nUpdate\nConsumer Terms and Privacy Policy\nAug 28, 2025\nToday, we‚Äôre launching Claude 3.5 Sonnet‚Äîour first release in the forthcoming Claude 3.5 model family. Claude 3.5 Sonnet raises the industry bar for intelligence, outperforming competitor models and Claude 3 Opus on a wide range of evaluations, with the speed and cost of our mid-tier model, Claude 3 Sonnet.\nClaude 3.5 Sonnet is now available for free on Claude.ai and the Claude iOS app, while Claude Pro and Team plan subscribers can access it with significantly higher rate limits. It is also available via the Anthropic \nAPI\n, \nAmazon Bedrock\n, and \nGoogle Cloud‚Äôs Vertex AI\n. The model costs $3 per million input tokens and $15 per million output tokens, with a 200K token context window.\nFrontier intelligence at 2x the speed\nClaude 3.5 Sonnet sets new industry benchmarks for graduate-level reasoning (GPQA), undergraduate-level knowledge (MMLU), and co

In [9]:
import requests
from bs4 import BeautifulSoup
import os
from concurrent.futures import ThreadPoolExecutor
from urllib.parse import urljoin

# --- CONFIG ---
INTEL_DIR = "/nuvodata/User_data/shiva/Market_carousal/intelligence"
os.makedirs(INTEL_DIR, exist_ok=True)

# Optimized Session for connection reuse
session = requests.Session()
session.headers.update({
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36"
})

def optimized_fetch(url):
    """Fetches and parses a single URL efficiently."""
    try:
        response = session.get(url, timeout=10)
        response.raise_for_status()
        
        # Use 'lxml' for C-based speed (pip install lxml)
        soup = BeautifulSoup(response.content, 'lxml')
        
        # EFFICIENCY: Target only the core content tags
        # Most professional blogs/sites use these tags for their actual content
        main_content = soup.find(['article', 'main', 'div.content', 'section'])
        if not main_content:
            main_content = soup.body # Fallback
            
        # Clean text: remove script/style/nav
        for tag in main_content(["script", "style", "nav", "footer", "header"]):
            tag.decompose()
            
        return main_content.get_text(separator=' ', strip=True)
    except Exception as e:
        return f"Error: {e}"

def batch_scout(url_list):
    """Scouts multiple competitors in parallel."""
    print(f"üöÄ Starting Parallel Scout for {len(url_list)} targets...")
    
    # Use 5 threads (don't go too high to avoid getting blocked)
    with ThreadPoolExecutor(max_workers=5) as executor:
        results = list(executor.map(optimized_fetch, url_list))
    
    for i, content in enumerate(results):
        filename = f"competitor_{i+1}.txt"
        path = os.path.join(INTEL_DIR, filename)
        with open(path, "w") as f:
            f.write(content[:4000]) # Limit to 5000 chars for Llama efficiency
        print(f"‚úÖ Saved: {path}")

# Example: Run it in your notebook
batch_scout(["https://openai.com/news", "https://www.anthropic.com/news"])

üöÄ Starting Parallel Scout for 2 targets...
‚úÖ Saved: /nuvodata/User_data/shiva/Market_carousal/intelligence/competitor_1.txt
‚úÖ Saved: /nuvodata/User_data/shiva/Market_carousal/intelligence/competitor_2.txt


In [41]:
import os
from langchain_text_splitters import MarkdownHeaderTextSplitter, RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

# 1. Path Configuration
FILE_PATH = "/nuvodata/User_data/shiva/Market_carousal/knowledge.md"
FAISS_SAVE_PATH = "/nuvodata/User_data/shiva/Market_carousal/faiss_index"

# 2. Markdown Structural Splitting
headers_to_split_on = [
    ("#", "Header 1"),
    ("##", "Header 2"),
    ("###", "Header 3"),
]

with open(FILE_PATH, 'r') as f:
    markdown_content = f.read()

md_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
md_header_splits = md_splitter.split_text(markdown_content)

# 3. Fine-tuning Chunk Size (Optional but recommended)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
final_chunks = text_splitter.split_documents(md_header_splits)

# 4. Embeddings & FAISS Initialization
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Create FAISS vector store
vector_store = FAISS.from_documents(final_chunks, embeddings)

# 5. Save the index locally
# This creates a folder containing the index and docstore mapping
vector_store.save_local(FAISS_SAVE_PATH)

print(f"FAISS index created with {len(final_chunks)} chunks and saved to {FAISS_SAVE_PATH}")

Loading weights: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 103/103 [00:00<00:00, 1354.25it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


FAISS index created with 17 chunks and saved to /nuvodata/User_data/shiva/Market_carousal/faiss_index


In [None]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

# Load index
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
new_db = FAISS.load_local(
    "/nuvodata/User_data/shiva/Market_carousal/faiss_index", 
    embeddings, 
    allow_dangerous_deserialization=True  # Required for loading local FAISS files
)

# Search
query = "What is Nueralogic's cost efficiency advantage?"
docs = new_db.similarity_search(query, k=2)

for i, doc in enumerate(docs):
    print(f"Result {i+1}: {doc.page_content}\n")

Loading weights: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 103/103 [00:00<00:00, 1056.89it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


Result 1: * **Company Name:** Nueralogic
* **Website:** [https://nueralogic.com](https://nueralogic.com)
* **Positioning:** Execution-focused AI solutions and automation partner.
* **Core Description:** Nueralogic is an AI solutions company that helps enterprises design, build, and deploy production-grade AI systems that deliver measurable business outcomes. We specialize in custom AI models, intelligent automation, computer vision, LLM-powered systems, and data intelligence platforms.
* **The Nueralogic Difference:** Unlike traditional consultants, Nueralogic focuses on execution‚Äîshipping real systems that integrate directly into business workflows. We combine deep technical expertise with cost-efficient, high-tier engineering talent based in India, enabling clients to achieve ~40% cost savings without compromising quality, security, or scalability.  
---

Result 2: * **Execution over experimentation:** Production-grade AI, not just demos.
* **End-to-end ownership:** Strategy, build

In [40]:
import chromadb
# This is the "secret" command to reset the internal singleton
chromadb.api.shared_system_client.SharedSystemClient._identifier_to_system.clear()
print("üßπ Chroma System Cache Flushed.")

üßπ Chroma System Cache Flushed.
