In [None]:
print("hello")

hello


In [None]:
# # Core Langchain and LangGraph packages
# !pip install langchain langgraph langchain_core langchain_community

# # Model providers and integrations
# !pip install langchain_openai langchain_groq langchain_huggingface

# # Utilities and tools
# !pip install faiss-cpu python-dotenv tqdm chromadb duckduckgo


In [None]:
from langgraph.graph import StateGraph
from langchain_core.messages import HumanMessage
from typing_extensions import TypedDict
from typing import Annotated
from langgraph.graph.message import add_messages

from typing import TypedDict, List

# Define the state
from typing import Optional

class NewsState(TypedDict):
    messages: list
    topic: str
    raw_news: str
    summary: str
    article: list
    originality_scores: Optional[List[float]]
    sentiment: Optional[str]
    category: Optional[str]
    categories: Optional[List[str]]
    entities: List[str]
    title: Optional[str]
    loop_count: Optional[int]
    final_article: Optional[str]
    language: Optional[str]
    translated_article: Optional[str]


ModuleNotFoundError: No module named 'langgraph'

In [None]:
from google.colab import userdata
import os
GROQ_API_KEY = userdata.get('GROQ_API_KEY')
TAVILY_API_KEY = userdata.get('TAVILY_API_KEY')

os.environ["GROQ_API_KEY"] = GROQ_API_KEY
os.environ["TAVILY_API_KEY"] = TAVILY_API_KEY

SecretNotFoundError: Secret GROQ_API_KEY does not exist.

In [None]:
# Node 1: Receive Topic
def receive_topic(state: NewsState):
    topic = state["messages"][-1].content
    return {"topic": topic}


In [None]:
from langchain_community.tools.tavily_search import TavilySearchResults

tool = TavilySearchResults(
    max_results=5,
    search_depth="advanced",
    include_raw_content=True
)

# # Directly call the tool
# def fetch_news(state: NewsState):
#     query = state["topic"]
#     # The result of tool.invoke is a list of dictionaries
#     search_results_list = tool.invoke({"query": query})
#     print("Raw Tavily Response:", search_results_list)

#     processed_news = []
#     # Iterate directly over the list returned by the tool
#     for item in search_results_list:
#         processed_news.append({
#             "title": item.get("title", "No title"),
#             "content": item.get("content", ""),
#             "url": item.get("url", "")
#         })

#     # The __artifacts should store the raw list, not an item from the loop
#     return {
#         "raw_news": processed_news,
#         "__artifacts": {
#             "search_query": state["topic"],
#             "search_results": search_results_list # Store the full list here
#         }
#     }


# from langchain_community.tools.tavily_search import TavilySearchResults

# tool = TavilySearchResults(
#     max_results=5,
#     search_depth="advanced",
#     include_raw_content=True
# )

def fetch_news(state: NewsState) -> dict:
    query = state["topic"]
    loop_count = state.get("loop_count", 0)

    # The result of tool.invoke is a list of dictionaries
    search_results_list = tool.invoke({"query": query})
    print("Raw Tavily Response:", search_results_list)

    processed_news = []
    article_texts = []

    for item in search_results_list:
        content = item.get("content", "")
        processed_news.append({
            "title": item.get("title", "No title"),
            "content": content,
            "url": item.get("url", "")
        })
        article_texts.append(content)

    return {
        "raw_news": processed_news,
        "article": article_texts,
        "loop_count": loop_count + 1,
        "__artifacts": {
            "search_query": query,
            "search_results": search_results_list
        }
    }


In [None]:
## 2. Filter Node
def filter_irrelevant(state: NewsState) -> dict:
    raw = state.get("raw_news", [])

    # Filter out items with empty or very short content
    filtered = [
        item for item in raw
        if item.get("content") and len(item["content"]) > 50
    ]

    print(f"Filtered: {len(filtered)} / {len(raw)}")

    return {"raw_news": filtered}

In [None]:
# # === Router for Filter Node ===
# def route_filter(state: NewsState) -> str:
#     news_count = len(state.get("raw_news", []))
#     if news_count < 5:
#         print("News count < 5, looping back to search.")
#         return "FetchNews"
#     return "SummarizeNews"
def route_filter(state: NewsState) -> str:
    if len(state.get("raw_news", [])) < 5:
        print("Routing back to FetchNews")
        return False
    print("Routing to SummarizeNews")
    return True


In [None]:
# Check Originality
def check_originality(state: NewsState) -> dict:
    article = state.get('article', "")
    prompt = f"Rate the originality of this article from 1 to 10:\n{article}"
    score = llm.invoke(prompt).content.strip()
    print("Score from LLM:", score)
    return {"originality_scores": [score]}



# def originality_checker(state: NewsState) -> str:
#     scores = state.get("originality_scores", [])
#     print("Originality scores:", scores)

#     valid_scores = []
#     for score in scores:
#         try:
#             valid_scores.append(float(score))
#         except (ValueError, TypeError):
#             continue

#     if valid_scores and max(valid_scores) > 5.0:
#         print("Routing to: SummarizeNews")
#         return True

#     print("Routing to: FetchNews")
#     return False

def originality_checker(state: NewsState) -> str:
    scores = state.get("originality_scores", [])
    loop_count = state.get("loop_count", 0)
    print("Originality scores:", scores, "Loop:", loop_count)

    valid_scores = []
    for score in scores:
        try:
            valid_scores.append(float(score))
        except (ValueError, TypeError):
            continue

    # Stop looping after 3 tries
    if loop_count >= 3:
        print("Too many loops, proceeding to summarize anyway.")
        return True

    if valid_scores and max(valid_scores) > 5.0:
        return True

    return False



In [None]:
# Node 3: Summarize News
from langchain_groq import ChatGroq
llm = ChatGroq(
    model="DeepSeek-R1-Distill-Llama-70b",
    temperature=0.7,
    streaming=True,
    verbose=True
)
def summarize_news(state: NewsState):
    prompt = f"Summarize this news:\n{state['raw_news']}"
    summary = llm.invoke(prompt).content.strip()
    return {"summary": summary}


In [None]:
# Analyze Sentiment
def analyze_sentiment(state: NewsState):
    article = state.get('article')
    if article:
        prompt = f"Classify the sentiment of this news article as Positive, Negative, or Neutral:\n{article}"
        sentiment = llm.invoke(prompt).content.strip()
        return {"sentiment": sentiment}
    return {"sentiment": "No article provided"}

In [None]:
# Categorize Article
def categorize_article(state: NewsState):
    summary = state.get('summary')
    if isinstance(summary, list):
        categories = []
        for s in summary:
            prompt = f"What is the category of this summary? Choose from: Politics, Tech, Sports, Economy, Health, Entertainment.\n{s}"
            category = llm.invoke(prompt).content.strip()
            categories.append(category)
        print("Categories:", categories)
        return {"categories": categories}
    elif isinstance(summary, str):
        prompt = f"What is the category of this summary? Choose from: Politics, Tech, Sports, Economy, Health, Entertainment.\n{summary}"
        category = llm.invoke(prompt).content.strip()
        print("Category:", category)
        return {"category": category}
    return {"category": "No summary provided"}

In [None]:
# Extract Entities
def extract_entities(state: NewsState):
    article = state.get("article")
    if article:
        prompt = f"Extract all named entities (people, places, organizations) from this article:\n{article}"
        response = llm.invoke(prompt).content.strip()
        print("Entities:", response)
        return {"entities": response}

    return {"entities": "No article available for entity extraction"}

In [None]:
# Generate Title
def generate_title(state: NewsState):
    summary = state.get("summary")
    prompt = f"Generate a catchy headline for this article:\n{summary}"
    title = llm.invoke(prompt).content.strip()
    print("Generated Title:", title)
    return {"title": title}

In [None]:
# Generate Article
def generate_article(state: NewsState):
    prompt = f"Write a news article based on this summary:\n{state['summary']}"
    article = llm.invoke(prompt).content.strip()
    print("Generated Article:", article)
    return {
        "article": article,
        # Use HumanMessage instead of raw dict
        "messages": [HumanMessage(content=article)]
    }

In [None]:
def compile_and_check_article(state: NewsState):
    title = state.get("title", "")
    article = state.get("article", "")

    # Convert article list to a string if needed
    if isinstance(article, list):
        article = "\n\n".join(article)

    full_text = f"Title: {title}\n\n{article}"

    prompt = f"""Please proofread and correct any spelling or grammar mistakes in the following article. Keep the structure and tone intact.

              --- ARTICLE START ---

              {full_text}

              --- ARTICLE END ---

              Return the cleaned-up article below:"""

    final_article = llm.invoke(prompt).content.strip()

    return {"final_article": final_article}


In [None]:
from langchain_community.tools import HumanInputRun

# This tool allows waiting for human input
human_input = HumanInputRun()

def get_user_language_choice(state: NewsState):
    language = human_input.run("Enter the language to translate the article into:")
    return {"language": language}


In [None]:
def translate_article(state: NewsState):
    language = state.get("language", "English")
    final_article = state.get("final_article", "")
    if language != None:
      prompt = f"""Translate the following article into {language}:

  --- ARTICLE START ---
  {final_article}
  --- ARTICLE END ---

  Return the full translation below:"""

      translated = llm.invoke(prompt).content.strip()
      return {"translated_article": translated}
    return {"translated_article": final_article}


In [None]:
graph_builder = StateGraph(NewsState)

# Add nodes
graph_builder.add_node("ReceiveTopic", receive_topic)
graph_builder.add_node("FetchNews", fetch_news)
graph_builder.add_node("FilterNews", filter_irrelevant)
graph_builder.add_node("CheckOriginality", check_originality)
graph_builder.add_node("SummarizeNews", summarize_news)
graph_builder.add_node("AnalyzeSentiment", analyze_sentiment)
graph_builder.add_node("CategorizeArticle", categorize_article)
graph_builder.add_node("ExtractEntities", extract_entities)
graph_builder.add_node("GenerateTitle", generate_title)
graph_builder.add_node("GenerateArticle", generate_article)
graph_builder.add_node("CompileAndCheckArticle", compile_and_check_article)
graph_builder.add_node("GetUserLanguageChoice", get_user_language_choice)
graph_builder.add_node("TranslateArticle", translate_article)


# Set up edges (same as before)
graph_builder.set_entry_point("ReceiveTopic")
graph_builder.add_edge("ReceiveTopic", "FetchNews")
graph_builder.add_edge("FetchNews", "FilterNews")
graph_builder.add_conditional_edges("FilterNews", route_filter, {True: "CheckOriginality", False: "FetchNews"})
# graph_builder.add_edge("CheckOriginality", "SummarizeNews")
graph_builder.add_conditional_edges("CheckOriginality", originality_checker, {True: "SummarizeNews", False: "FetchNews"})
graph_builder.add_edge("SummarizeNews", "AnalyzeSentiment")
graph_builder.add_edge("SummarizeNews", "CategorizeArticle")
graph_builder.add_edge("SummarizeNews", "ExtractEntities")
graph_builder.add_edge("SummarizeNews", "GenerateTitle")
graph_builder.add_edge("AnalyzeSentiment", "GenerateArticle")
graph_builder.add_edge("CategorizeArticle", "GenerateArticle")
graph_builder.add_edge("ExtractEntities", "GenerateArticle")
graph_builder.add_edge("GenerateTitle", "CompileAndCheckArticle")
graph_builder.add_edge("GenerateArticle", "CompileAndCheckArticle")
graph_builder.add_edge("CompileAndCheckArticle", "GetUserLanguageChoice")
graph_builder.add_edge("GetUserLanguageChoice", "TranslateArticle")
graph_builder.set_finish_point("TranslateArticle")

graph = graph_builder.compile()
# Example invocation
inputs = {"messages": [HumanMessage(content="AI in india")]}
result = graph.invoke(inputs)



In [None]:
print(result['final_article'])

In [None]:
# !pip install pyppeteer
# # You might also need to install a browser executable if pyppeteer hasn't done it.
# # Run this command in your terminal if pyppeteer can't find a browser:
# # pyppeteer-install

In [None]:
from IPython.display import Image, display
# display(Image(graph.get_graph().draw_mermaid_png()))

# # Import CurveStyle and MermaidDrawMethod from langgraph.graph.mermaid
# from langgraph.graph.mermaid import CurveStyle, MermaidDrawMethod, NodeStyles
from langchain_core.runnables.graph_mermaid import draw_mermaid_png

display(
    Image(
        graph.get_graph().draw_mermaid_png(
        )
    )
)