In [2]:
import json
import re
import os
from typing import Dict, Any, Union

from langchain_openai import ChatOpenAI
from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain.tools import Tool
from langchain_community.tools.tavily_search import TavilySearchResults
from dotenv import load_dotenv
import textstat

load_dotenv()

# ==============================================================================
#  TOOL LOGIC FUNCTIONS
# ==============================================================================

def check_length_and_structure(text: str) -> str:
    """Analyzes content length and structure (word count, paragraphs)."""
    words = text.split()
    word_count = len(words)
    sentences = [s.strip() for s in re.split(r'[.!?]+', text) if s.strip()]
    paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
    issues = []
    score = 10
    
    if word_count < 15:
        issues.append(f"Content is very short with only {word_count} words.")
        score -= 3
    if word_count > 150 and len(paragraphs) == 1:
        issues.append("Long content is not broken into paragraphs, making it hard to read.")
        score -= 2
    if len(sentences) < 2 and word_count > 20:
        issues.append("Content consists of a single long sentence; consider breaking it up.")
        score -= 2
    
    return json.dumps({
        "word_count": word_count, 
        "sentences": len(sentences), 
        "paragraphs": len(paragraphs), 
        "issues": issues,
        "score": max(1, score),
        "score_explanation": f"Structure score: {max(1, score)}/10. Based on {word_count} words, {len(sentences)} sentences, {len(paragraphs)} paragraphs."
    })

def check_grammar_with_llm(text: str) -> str:
    """Uses a powerful LLM to check for grammar and spelling errors."""
    print("üß† Using LLM to check grammar...")
    checker_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    prompt = f"""
    You are an expert proofreader. Analyze the following text for grammar and spelling mistakes.
    Respond with a JSON object with:
    - "errors": ["list of concise error descriptions"]
    - "score": integer from 1-10 (10 = perfect, deduct points for each error)
    - "score_explanation": brief explanation of why this score was given
    
    Text: "{text}"
    """
    response = checker_llm.invoke(prompt)
    return response.content

def check_readability_with_textstat(text: str) -> str:
    """Uses the 'textstat' library to calculate objective readability scores."""
    if not text.strip():
        return json.dumps({"score": 0, "level": "unassessable", "readability_score": 1, "score_explanation": "Cannot assess readability of empty text."})
    
    flesch_score = textstat.flesch_reading_ease(text)
    level = "Very Easy"
    readability_score = 10
    
    if flesch_score < 30: 
        level = "Very Confusing (College Graduate)"
        readability_score = 3
    elif flesch_score < 60: 
        level = "Difficult"
        readability_score = 6
    elif flesch_score < 80: 
        level = "Fairly Easy"
        readability_score = 8
    
    return json.dumps({
        "flesch_reading_ease_score": flesch_score, 
        "level": level,
        "readability_score": readability_score,
        "score_explanation": f"Readability score: {readability_score}/10. Flesch score of {flesch_score:.1f} indicates {level.lower()} reading level."
    })

def check_professionalism(text: str) -> str:
    """Checks for unprofessional content like profanity, slang, or excessive formatting."""
    issues = []
    score = 10
    
    if re.search(r'\b(fuck|shit|damn|bitch|asshole)\b', text, re.IGNORECASE):
        issues.append("Inappropriate or profane language was found.")
        score -= 5
    if re.search(r'\b[A-Z]{4,}\b', text) and sum(1 for c in text if c.isupper()) / len(text) > 0.3:
        issues.append("Excessive capitalization is used, which appears unprofessional.")
        score -= 3
    if re.search(r'[!?@#$%^&*()]{4,}', text):
        issues.append("Excessive punctuation or symbols are used.")
        score -= 2
    
    return json.dumps({
        "issues": issues,
        "score": max(1, score),
        "score_explanation": f"Professionalism score: {max(1, score)}/10. {len(issues)} professional issues detected."
    })

def check_redundancy(text: str) -> str:
    """Analyzes the text for repetitive sentences and overused words."""
    sentences = [s.lower().strip() for s in re.split(r'[.!?]+', text) if s.strip()]
    issues = []
    score = 10
    
    if len(sentences) > 2:
        redundancy_ratio = len(set(sentences)) / len(sentences)
        if redundancy_ratio < 0.5:
            redundancy_percentage = 100 - redundancy_ratio*100
            issues.append(f"High sentence redundancy detected. {redundancy_percentage:.0f}% of sentences are repetitive.")
            score = max(1, int(redundancy_ratio * 10))
    
    return json.dumps({
        "redundancy_issues": issues,
        "score": score,
        "score_explanation": f"Redundancy score: {score}/10. Unique sentence ratio: {(len(set(sentences)) / max(1, len(sentences))):.2f}"
    })

def check_itext_payload_faithfulness(payload_str: str) -> str:
    """
    Analyzes an iText JSON payload. It checks for consistency between the message and data,
    and evaluates the faithfulness and relevance between the 'content', 'simplified', and 'elaborated' versions.
    """
    print("üß† Using LLM to analyze iText payload...")
    try:
        payload = json.loads(payload_str)
        message = payload.get("message", "")
        content_data = payload.get("data", [{}])[0].get("data", {})
        original = content_data.get("content", "")
        simplified = content_data.get("simplified", "")
        elaborated = content_data.get("elaborated", "")
    except (json.JSONDecodeError, IndexError, KeyError) as e:
        return json.dumps({"error": f"Invalid iText payload structure: {e}"})

    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    prompt = f"""
    You are an expert iText payload evaluator. Analyze the following payload:
    Message: "{message}"
    Original Content: "{original}"
    Simplified Content: "{simplified}"
    Elaborated Content: "{elaborated}"

    Evaluate two things:
    1.  **Consistency**: Does the content seem to match the message (e.g., a "success" message with valid content)?
    2.  **Faithfulness & Relevance**: Compare the three text versions. Do the simplified/elaborated versions maintain the original meaning without adding incorrect info?

    Return ONLY a JSON object with your findings, like this:
    {{
        "consistency_check": "<Brief summary of consistency>",
        "faithfulness_score": <int 1-10>,
        "relevance_score": <int 1-10>,
        "comparison_summary": "<Brief summary of how the versions differ>",
        "score_explanation": "<Detailed explanation of why these scores were given>"
    }}
    """
    response = llm.invoke(prompt)
    return response.content

# ==============================================================================
#  AGENT CLASS
# ==============================================================================

class ContentQualityAgent:
    def __init__(self, model="gpt-4o-mini", temperature=0):
        self.llm = ChatOpenAI(model=model, temperature=temperature)
        self.setup_agent()

    def setup_agent(self):
        tools = [
            Tool(name="iTextPayloadFaithfulnessCheck", func=check_itext_payload_faithfulness, description="Use this ONLY for a JSON input representing an iText payload. It checks faithfulness between content versions."),
            Tool(name="GrammarAndSpellingCheck", func=check_grammar_with_llm, description="Use to check a piece of text for spelling and grammatical errors."),
            Tool(name="ReadabilityCheck", func=check_readability_with_textstat, description="Use to get the readability score of a piece of text."),
            Tool(name="ProfessionalismCheck", func=check_professionalism, description="Use to check a piece of text for unprofessional language."),
            Tool(name="LengthAndStructureCheck", func=check_length_and_structure, description="Use to get word count and structure of a piece of text."),
            Tool(name="RedundancyCheck", func=check_redundancy, description="Use to check a piece of text for repetitive sentences."),
            TavilySearchResults(name="FactCheckSearch", max_results=3, description="Use to verify factual claims in a piece of text.")
        ]
        
        prompt = ChatPromptTemplate.from_messages([
            ("system", """You are an expert Quality Assurance assistant. Your goal is to provide a complete quality report.

            You will receive one of two types of input:
            1.  A plain text paragraph.
            2.  A JSON string representing an "iText payload".

            **Your Plan:**
            - If the input is an iText payload (JSON), you MUST use the `iTextPayloadFaithfulnessCheck` tool on the entire JSON. You should ALSO run standard checks (grammar, professionalism, etc.) on the 'content' field within the payload.
            - If the input is plain text, just run the standard checks on it.
            
            After executing your plan, compile all results into the final JSON report.

            **FINAL ANSWER FORMATTING INSTRUCTIONS:**
            Your final answer MUST be a single JSON object with the specified structure.
            {{
                "overall_score": <An integer score from 1-10>,
                "category_scores": {{
                    "grammar_and_spelling": <Score 1-10>,
                    "readability": <Score 1-10>,
                    "professionalism_and_tone": <Score 1-10>,
                    "factual_accuracy": <Score 1-10. Default to 10 if no claims to check.>,
                    "redundancy": <Score 1-10>,
                    "faithfulness": <Score 1-10. Only applies to iText. Default to 10 for plain text.>
                }},
                "score_explanations": {{
                    "grammar_and_spelling": "<Explanation for grammar score>",
                    "readability": "<Explanation for readability score>",
                    "professionalism_and_tone": "<Explanation for professionalism score>",
                    "factual_accuracy": "<Explanation for factual accuracy score>",
                    "redundancy": "<Explanation for redundancy score>",
                    "faithfulness": "<Explanation for faithfulness score>"
                }},
                "summary": "<A natural language summary of key issues and suggestions for improvement.>"
            }}
            """),
            ("human", "{input}"),
            ("placeholder", "{agent_scratchpad}"),
        ])
        
        agent = create_openai_functions_agent(self.llm, tools, prompt)
        self.agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

    async def validate_async(self, input_data: Union[str, Dict[str, Any]]) -> Dict[str, Any]:
        """Runs the agent asynchronously on either a string or a dictionary."""
        
        if isinstance(input_data, dict):
            # It's an iText payload, serialize it to a string for the agent
            input_str = json.dumps(input_data, indent=2)
            analysis_subject = "the following iText payload"
        else:
            # It's a plain paragraph
            input_str = input_data
            analysis_subject = "the following text"
            
        prompt = f"Please provide a comprehensive quality analysis of {analysis_subject}:\n---\n{input_str}\n---"
        
        result = {} 
        try:
            result = await self.agent_executor.ainvoke({"input": prompt})
            # Use regex to find the JSON object in case the LLM adds extra text
            json_match = re.search(r'\{.*\}', result['output'], re.DOTALL)
            if json_match:
                return json.loads(json_match.group(0))
            else:
                raise json.JSONDecodeError("No JSON object found in agent output.", result['output'], 0)
        except (json.JSONDecodeError, KeyError) as e:
            print(f"‚ùå Error during agent execution or parsing: {e}")
            return {"error": "Failed to generate a valid report.", "raw_output": result.get('output', 'No output was generated.')}

# ==============================================================================
#  MAIN EXECUTION BLOCK
# ==============================================================================
async def main():
    if not os.getenv("OPENAI_API_KEY") or not os.getenv("TAVILY_API_KEY"):
        print("‚ùå Error: API keys must be set in your .env file.")
        return

    print("üöÄ Initializing Content Quality Agent...")
    validator = ContentQualityAgent()
    
    # --- CASE 2: iText Payload ---
    itext_payload = {
        "message": "Successfully generated data science",
        "data": [
            {
                "type": "iText",
                "data": {
                    "content": "The sun is a star at the center of the Solar System. It is a nearly perfect ball of hot plasma. The Sun's diameter is about 109 times that of Earth.",
                    "simplified": "The sun is a big star in the middle of our solar system.",
                    "elaborated": "The Sun, a G-type main-sequence star (G2V), is the focal point of our Solar System. Comprising over 99.8% of the system's mass, it is an immense sphere of incandescent plasma, with a diameter of approximately 1.39 million kilometers, roughly 109 times that of our planet, which is a big lie."
                }
            }
        ]
    }
    print("\n\nüîç Running analysis on iText Payload...")
    print("=" * 60)
    report2 = await validator.validate_async(itext_payload)
    print_report(report2)


def print_report(report: Dict[str, Any]):
    """Helper function to neatly print the agent's report."""
    print("\n‚úÖ FINAL AGENT REPORT:")
    print("-" * 60)
    
    if "error" in report:
        print(f"An error occurred: {report['error']}")
        print(f"Raw Output: {report.get('raw_output')}")
    else:
        print(f"üèÜ Overall Quality Score: {report.get('overall_score', 'N/A')} / 10")
        print("\n--- Category Breakdown ---")
        
        category_scores = report.get('category_scores', {})
        score_explanations = report.get('score_explanations', {})
        
        for category, score in category_scores.items():
            category_name = category.replace('_', ' ').title()
            explanation = score_explanations.get(category, "No explanation provided.")
            print(f"   - {category_name}: {score}/10")
            print(f"     ‚îî‚îÄ {explanation}")
        
        print("\n--- Summary & Suggestions ---")
        print(report.get('summary', 'No summary provided.'))
    
    print("-" * 60)

# Run the main async function
if __name__ == "__main__":
    await main()


üöÄ Initializing Content Quality Agent...


üîç Running analysis on iText Payload...


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `iTextPayloadFaithfulnessCheck` with `{"message":"Successfully generated data science","data":[{"type":"iText","data":{"content":"The sun is a star at the center of the Solar System. It is a nearly perfect ball of hot plasma. The Sun's diameter is about 109 times that of Earth.","simplified":"The sun is a big star in the middle of our solar system.","elaborated":"The Sun, a G-type main-sequence star (G2V), is the focal point of our Solar System. Comprising over 99.8% of the system's mass, it is an immense sphere of incandescent plasma, with a diameter of approximately 1.39 million kilometers, roughly 109 times that of our planet, which is a big lie."}}]}`


[0müß† Using LLM to analyze iText payload...
[36;1m[1;3m{
    "consistency_check": "The message indicates successful data generation, which aligns with the provided conten

In [None]:
import json
import re

from better_profanity import profanity
import os
from typing import Dict, Any, Union

from langchain_openai import ChatOpenAI
from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain.tools import Tool
from langchain_community.tools.tavily_search import TavilySearchResults
from dotenv import load_dotenv
import textstat
import asyncio # Added for asyncio.run

load_dotenv()

# ==============================================================================
#  TOOL LOGIC FUNCTIONS
# ==============================================================================

def check_length_and_structure(text: str) -> str:
    """Analyzes content length and structure (word count, paragraphs)."""
    words = text.split()
    word_count = len(words)
    sentences = [s.strip() for s in re.split(r'[.!?]+', text) if s.strip()]
    paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
    issues = []
    score = 10
    
    if word_count < 15:
        issues.append(f"Content is very short with only {word_count} words.")
        score -= 3
    if word_count > 150 and len(paragraphs) == 1:
        issues.append("Long content is not broken into paragraphs, making it hard to read.")
        score -= 2
    if len(sentences) < 2 and word_count > 20:
        issues.append("Content consists of a single long sentence; consider breaking it up.")
        score -= 2
    
    return json.dumps({
        "word_count": word_count, 
        "sentences": len(sentences), 
        "paragraphs": len(paragraphs), 
        "issues": issues,
        "score": max(1, score),
        "score_explanation": f"Structure score: {max(1, score)}/10. Based on {word_count} words, {len(sentences)} sentences, {len(paragraphs)} paragraphs."
    })

def check_grammar_with_llm(text: str) -> str:
    """Uses a powerful LLM to check for grammar and spelling errors."""
    print("üß† Using LLM to check grammar...")
    checker_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    
    # --- THIS IS THE MODIFIED PROMPT ---
    prompt = f"""
    You are a highly focused grammar and spelling checker. Your *only* job is to find objective, mechanical errors.

    -   **LOOK FOR:** Spelling mistakes, incorrect punctuation, subject-verb agreement, run-on sentences, and incorrect word usage (e.g., 'their' vs 'they're').
    -   **YOU MUST IGNORE:** The text's tone, style, opinions, or any profanity. Another tool is responsible for checking professionalism.

    Analyze the text below *strictly* for mechanical grammar and spelling errors.
    If there are no errors, return an empty list and a score of 10.
    
    Respond with a JSON object with:
    -   "errors": ["list of concise error descriptions (e.g., 'Spelling error: "wrogn" should be "wrong"')"]
    -   "score": integer from 1-10 (10 = perfect, deduct points *only* for each *mechanical* error)
    -   "score_explanation": brief explanation of why this score was given, based *only* on grammar/spelling.
    
    Text: "{text}"
    """
    response = checker_llm.invoke(prompt)
    return response.content


def check_professionalism_with_library(text: str) -> str:
    """Checks for unprofessional content using the 'better-profanity' library."""

    # You can load a custom word list if needed, or just use the default
    # profanity.load_censor_words() 

    issues = []
    score = 10

    if profanity.contains_profanity(text):
        issues.append("Inappropriate or profane language was found.")
        score -= 5

    # You can keep your other regex checks for capitalization, etc.
    if re.search(r'\b[A-Z]{4,}\b', text) and sum(1 for c in text if c.isupper()) / len(text) > 0.3:
        issues.append("Excessive capitalization is used, which appears unprofessional.")
        score -= 3
    if re.search(r'[!?@#$%^&*()]{4,}', text):
        issues.append("Excessive punctuation or symbols are used.")
        score -= 2

    return json.dumps({
        "issues": issues,
        "score": max(1, score),
        "score_explanation": f"Professionalism score: {max(1, score)}/10. {len(issues)} professional issues detected."
    })

def check_redundancy(text: str) -> str:
    """Analyzes the text for repetitive sentences and overused words."""
    sentences = [s.lower().strip() for s in re.split(r'[.!?]+', text) if s.strip()]
    issues = []
    score = 10
    
    if len(sentences) > 2:
        redundancy_ratio = len(set(sentences)) / len(sentences)
        if redundancy_ratio < 0.5:
            redundancy_percentage = 100 - redundancy_ratio*100
            issues.append(f"High sentence redundancy detected. {redundancy_percentage:.0f}% of sentences are repetitive.")
            score = max(1, int(redundancy_ratio * 10))
    
    return json.dumps({
        "redundancy_issues": issues,
        "score": score,
        "score_explanation": f"Redundancy score: {score}/10. Unique sentence ratio: {(len(set(sentences)) / max(1, len(sentences))):.2f}"
    })

# ==============================================================================
#  AGENT CLASS
# ==============================================================================

class ContentQualityAgent:
    def __init__(self, model="gpt-4o-mini", temperature=0):
        self.llm = ChatOpenAI(model=model, temperature=temperature)
        self.setup_agent()

    def setup_agent(self):
        tools = [
    
            Tool(name="GrammarAndSpellingCheck", func=check_grammar_with_llm, description="Use to check a piece of text for spelling and grammatical errors."),
            Tool(name="ReadabilityCheck", func=check_readability_with_textstat, description="Use to get the readability score of a piece of text."),
            Tool(name="ProfessionalismCheck", func=check_professionalism, description="Use to check a piece of text for unprofessional language."),
            Tool(name="LengthAndStructureCheck", func=check_length_and_structure, description="Use to get word count and structure of a piece of text."),
            Tool(name="RedundancyCheck", func=check_redundancy, description="Use to check a piece of text for repetitive sentences."),
            TavilySearchResults(name="FactCheckSearch", max_results=3, description="Use to verify factual claims in a piece of text.")
        ]
        
        # --- MODIFIED PROMPT ---
        prompt = ChatPromptTemplate.from_messages([
            ("system", """You are an expert Quality Assurance assistant. Your goal is to provide a complete quality report for a given piece of plain text.

            **Your Plan:**
            - You will receive a plain text input.
            - You must run a comprehensive set of checks on this text:
                1.  GrammarAndSpellingCheck
                2.  ReadabilityCheck
                3.  ProfessionalismCheck
                4.  LengthAndStructureCheck
                5.  RedundancyCheck
                6.  FactCheckSearch (use this if the text makes a specific factual claim, e.g., "The sun is 100 miles away").
            
            After executing your plan, compile all results into the final JSON report.

            **FINAL ANSWER FORMATTING INSTRUCTIONS:**
            Your final answer MUST be a single JSON object with the specified structure.
            {{
                "overall_score": <An integer score from 1-10, which is the average of all category scores>,
                "category_scores": {{
                    "grammar_and_spelling": <Score 1-10>,
                    "readability": <Score 1-10>,
                    "professionalism_and_tone": <Score 1-10>,
                    "factual_accuracy": <Score 1-10. Default to 10 if no claims to check.>,
                    "redundancy": <Score 1-10>
                }},
                "score_explanations": {{
                    "grammar_and_spelling": "<Explanation for grammar score>",
                    "readability": "<Explanation for readability score>",
                    "professionalism_and_tone": "<Explanation for professionalism score>",
                    "factual_accuracy": "<Explanation for factual accuracy score>",
                    "redundancy": "<Explanation for redundancy score>"
                }},
                "summary": "<A natural language summary of key issues and suggestions for improvement.>"
            }}
            """),
            ("human", "{input}"),
            ("placeholder", "{agent_scratchpad}"),
        ])
        
        agent = create_openai_functions_agent(self.llm, tools, prompt)
        self.agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

    # --- MODIFIED FUNCTION SIGNATURE ---
    async def validate_async(self, input_text: str) -> Dict[str, Any]:
        """Runs the agent asynchronously on a plain text string."""
        
        # Simplified prompt, as we only expect text
        prompt = f"Please provide a comprehensive quality analysis of the following text:\n---\n{input_text}\n---"
        
        result = {} 
        try:
            result = await self.agent_executor.ainvoke({"input": prompt})
            # Use regex to find the JSON object in case the LLM adds extra text
            json_match = re.search(r'\{.*\}', result['output'], re.DOTALL)
            if json_match:
                return json.loads(json_match.group(0))
            else:
                raise json.JSONDecodeError("No JSON object found in agent output.", result['output'], 0)
        except (json.JSONDecodeError, KeyError) as e:
            print(f"‚ùå Error during agent execution or parsing: {e}")
            return {"error": "Failed to generate a valid report.", "raw_output": result.get('output', 'No output was generated.')}

# ==============================================================================
#  MAIN EXECUTION BLOCK
# ==============================================================================
async def main():
    if not os.getenv("OPENAI_API_KEY") or not os.getenv("TAVILY_API_KEY"):
        print("‚ùå Error: API keys must be set in your .env file.")
        return

    print("üöÄ Initializing Content Quality Agent...")
    validator = ContentQualityAgent()
    
    # --- NEW: Plain Text Input Example ---
    plain_text_input = """
Recent studies from Harvard University have confirmed that drinking 10 cups of coffee daily will increase your IQ by 50 points and make you live 150 years longer. NASA has also discovered that the sun is actually made of liquid gold and will turn into a giant diamond in 2025. Additionally, scientists at MIT have proven that cats can speak 15 different languages but choose not to talk to humans because they think we're too stupid.    
 """   
    print("\n\nüîç Running analysis on plain text input...")
    print("=" * 60)
    report = await validator.validate_async(plain_text_input)
    print_report(report)


def print_report(report: Dict[str, Any]):
    """Helper function to neatly print the agent's report."""
    print("\n‚úÖ FINAL AGENT REPORT:")
    print("-" * 60)
    
    if "error" in report:
        print(f"An error occurred: {report['error']}")
        print(f"Raw Output: {report.get('raw_output')}")
    else:
        print(f"üèÜ Overall Quality Score: {report.get('overall_score', 'N/A')} / 10")
        print("\n--- Category Breakdown ---")
        
        category_scores = report.get('category_scores', {})
        score_explanations = report.get('score_explanations', {})
        
        for category, score in category_scores.items():
            category_name = category.replace('_', ' ').title()
            explanation = score_explanations.get(category, "No explanation provided.")
            print(f"   - {category_name}: {score}/10")
            print(f"     ‚îî‚îÄ {explanation}")
        
        print("\n--- Summary & Suggestions ---")
        print(report.get('summary', 'No summary provided.'))
    
    print("-" * 60)

# Run the main async function
if __name__ == "__main__":
    # Use asyncio.run() to execute the async main function
    await main()

üöÄ Initializing Content Quality Agent...


üîç Running analysis on plain text input...


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `GrammarAndSpellingCheck` with `Recent studies from Harvard University have confirmed that drinking 10 cups of coffee daily will increase your IQ by 50 points and make you live 150 years longer. NASA has also discovered that the sun is actually made of liquid gold and will turn into a giant diamond in 2025. Additionally, scientists at MIT have proven that cats can speak 15 different languages but choose not to talk to humans because they think we're too stupid.`


[0müß† Using LLM to check grammar...
[36;1m[1;3m{
    "errors": [],
    "score": 10,
    "score_explanation": "The text contains no mechanical grammar or spelling errors."
}[0m[32;1m[1;3m
Invoking: `ReadabilityCheck` with `Recent studies from Harvard University have confirmed that drinking 10 cups of coffee daily will increase your IQ by 50 points and make you

In [1]:
# Run this first if you haven't installed pymongo
!pip install pymongo






In [None]:
import json
import re
import os
import logging
from typing import Dict, Any, Union, Optional

# --- API Imports ---
from fastapi import FastAPI
from pydantic import BaseModel

# --- Your Existing Imports ---
from better_profanity import profanity
from langchain_openai import ChatOpenAI
from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain.tools import Tool
from langchain_community.tools.tavily_search import TavilySearchResults
from dotenv import load_dotenv
import textstat
import asyncio

load_dotenv()

# Setup professional logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# ==============================================================================
# ¬†TOOL LOGIC FUNCTIONS (Unchanged, but using logging)
# ==============================================================================

def check_length_and_structure(text: str) -> str:
    """Analyzes content length and structure (word count, paragraphs)."""
    logging.info(f"Running LengthAndStructureCheck on text (approx {len(text)} chars)")
    words = text.split()
    word_count = len(words)
    sentences = [s.strip() for s in re.split(r'[.!?]+', text) if s.strip()]
    paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
    issues = []
    score = 10
    
    if word_count < 15:
        issues.append(f"Content is very short with only {word_count} words.")
        score -= 3
    if word_count > 150 and len(paragraphs) == 1:
        issues.append("Long content is not broken into paragraphs, making it hard to read.")
        score -= 2
    if len(sentences) < 2 and word_count > 20:
        issues.append("Content consists of a single long sentence; consider breaking it up.")
        score -= 2
    
    return json.dumps({
        "word_count": word_count, 
        "sentences": len(sentences), 
        "paragraphs": len(paragraphs), 
        "issues": issues,
        "score": max(1, score),
        "score_explanation": f"Structure score: {max(1, score)}/10. Based on {word_count} words, {len(sentences)} sentences, {len(paragraphs)} paragraphs."
    })

def check_grammar_with_llm(text: str) -> str:
    """Uses a powerful LLM to check for grammar and spelling errors."""
    logging.info("üß† Using LLM to check grammar...")
    checker_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    
    prompt = f"""
    You are a highly focused grammar and spelling checker. Your *only* job is to find objective, mechanical errors.
    -   **LOOK FOR:** Spelling mistakes, incorrect punctuation, subject-verb agreement, run-on sentences, and incorrect word usage (e.g., 'their' vs 'they're').
    -   **YOU MUST IGNORE:** The text's tone, style, opinions, or any profanity. Another tool is responsible for checking professionalism.
    Analyze the text below *strictly* for mechanical grammar and spelling errors.
    If there are no errors, return an empty list and a score of 10.
    Respond with a JSON object with:
    -   "errors": ["list of concise error descriptions (e.g., 'Spelling error: "wrogn" should be "wrong"')"]
    -   "score": integer from 1-10 (10 = perfect, deduct points *only* for each *mechanical* error)
    -   "score_explanation": brief explanation of why this score was given, based *only* on grammar/spelling.
    Text: "{text}"
    """
    
    try:
        response = checker_llm.invoke(prompt)
        return response.content
    except Exception as e:
        logging.error(f"Grammar LLM call failed: {e}")
        return json.dumps({"errors": ["Grammar check failed to run."], "score": 1, "score_explanation": "Tool execution error."})


def check_professionalism_with_library(text: str) -> str:
    """Checks for unprofessional content using the 'better-profanity' library."""
    logging.info("Running ProfessionalismCheck")
    issues = []
    score = 10

    if profanity.contains_profanity(text):
        issues.append("Inappropriate or profane language was found.")
        score -= 5
    if re.search(r'\b[A-Z]{4,}\b', text) and sum(1 for c in text if c.isupper()) / len(text) > 0.3:
        issues.append("Excessive capitalization is used, which appears unprofessional.")
        score -= 3
    if re.search(r'[!?@#$%^&*()]{4,}', text):
        issues.append("Excessive punctuation or symbols are used.")
        score -= 2

    return json.dumps({
        "issues": issues,
        "score": max(1, score),
        "score_explanation": f"Professionalism score: {max(1, score)}/10. {len(issues)} professional issues detected."
    })

def check_redundancy(text: str) -> str:
    """Analyzes the text for repetitive sentences and overused words."""
    logging.info("Running RedundancyCheck")
    sentences = [s.lower().strip() for s in re.split(r'[.!?]+', text) if s.strip()]
    issues = []
    score = 10
    
    if len(sentences) > 2:
        redundancy_ratio = len(set(sentences)) / len(sentences)
        if redundancy_ratio < 0.5:
            redundancy_percentage = 100 - redundancy_ratio*100
            issues.append(f"High sentence redundancy detected. {redundancy_percentage:.0f}% of sentences are repetitive.")
            score = max(1, int(redundancy_ratio * 10))
    
    return json.dumps({
        "redundancy_issues": issues,
        "score": score,
        "score_explanation": f"Redundancy score: {score}/10. Unique sentence ratio: {(len(set(sentences)) / max(1, len(sentences))):.2f}"
    })

# ==============================================================================
# ¬†AGENT CLASS (Unchanged)
# ==============================================================================

class ContentQualityAgent:
    def __init__(self, model="gpt-4o-mini", temperature=0):
        self.llm = ChatOpenAI(model=model, temperature=temperature)
        self.setup_agent()

    def setup_agent(self):
        tools = [
            Tool(name="GrammarAndSpellingCheck", func=check_grammar_with_llm, description="Use to check a piece of text for spelling and grammatical errors."),
            Tool(name="ReadabilityCheck", func=check_readability_with_textstat, description="Use to get the readability score of a piece of text."),
            Tool(name="ProfessionalismCheck", func=check_professionalism_with_library, description="Use to check a piece of text for unprofessional language."),
            Tool(name="LengthAndStructureCheck", func=check_length_and_structure, description="Use to get word count and structure of a piece of text."),
            Tool(name="RedundancyCheck", func=check_redundancy, description="Use to check a piece of text for repetitive sentences."),
            TavilySearchResults(name="FactCheckSearch", max_results=3, description="Use to verify factual claims in a piece of text.")
        ]
        
        prompt = ChatPromptTemplate.from_messages([
            ("system", """You are an expert Quality Assurance assistant. Your goal is to provide a complete quality report for a given piece of plain text.

            **Your Plan:**
            - You will receive a plain text input.
            - You must run a comprehensive set of checks on this text:
                1.  GrammarAndSpellingCheck
                2.  ReadabilityCheck
                3.  ProfessionalismCheck
                4.  LengthAndStructureCheck
                5.  RedundancyCheck
                6.  FactCheckSearch (use this if the text makes a specific factual claim, e.g., "The sun is 100 miles away").
            
            After executing your plan, compile all results into the final JSON report.

            **FINAL ANSWER FORMATTING INSTRUCTIONS:**
            Your final answer MUST be a single JSON object with the specified structure.
            {{
                "overall_score": <An integer score from 1-10, which is the average of all category scores>,
                "category_scores": {{
                    "grammar_and_spelling": <Score 1-10>,
                    "readability": <Score 1-10>,
                    "professionalism_and_tone": <Score 1-10>,
                    "factual_accuracy": <Score 1-10. Default to 10 if no claims to check.>,
                    "redundancy": <Score 1-10>
                }},
                "score_explanations": {{
                    "grammar_and_spelling": "<Explanation for grammar score>",
                    "readability": "<Explanation for readability score>",
                    "professionalism_and_tone": "<Explanation for professionalism score>",
                    "factual_accuracy": "<Explanation for factual accuracy score>",
                    "redundancy": "<Explanation for redundancy score>"
                }},
                "summary": "<A natural language summary of key issues and suggestions for improvement.>"
            }}
            """),
            ("human", "{input}"),
            ("placeholder", "{agent_scratchpad}"),
        ])
        
        agent = create_openai_functions_agent(self.llm, tools, prompt)
        self.agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

    async def validate_async(self, input_text: str) -> Dict[str, Any]:
        """Runs the agent asynchronously on a plain text string."""
        
        prompt = f"Please provide a comprehensive quality analysis of the following text:\n---\n{input_text}\n---"
        
        result = {} 
        try:
            result = await self.agent_executor.ainvoke({"input": prompt})
            json_match = re.search(r'\{.*\}', result['output'], re.DOTALL)
            if json_match:
                return json.loads(json_match.group(0))
            else:
                raise json.JSONDecodeError("No JSON object found in agent output.", result['output'], 0)
        except (json.JSONDecodeError, KeyError) as e:
            logging.error(f"‚ùå Error during agent execution or parsing: {e}")
            return {"error": "Failed to generate a valid report.", "raw_output": result.get('output', 'No output was generated.')}


# ==============================================================================
# ¬†FASTAPI APP SETUP
# ==============================================================================

# 1. Define the input model
class ValidationRequest(BaseModel):
    """Defines the expected JSON input for our API."""
    # Use Optional to allow the user to send *either* one
    plain_text: Optional[str] = None
    # We can accept a complex dict for the iText payload
    itext_payload: Optional[Dict[str, Any]] = None

# 2. Initialize the API and the Agent (globally, only once)
app = FastAPI(
    title="Content Quality Agent API",
    description="API for running comprehensive quality checks on text."
)

logging.info("üöÄ Initializing Content Quality Agent for the API...")
validator = ContentQualityAgent()
logging.info("‚úÖ Agent Initialized. API is ready.")


# 3. Define the API Endpoint
@app.post("/validate", tags=["Validation"])
async def http_validate_content(request: ValidationRequest):
    """
    Run a comprehensive quality analysis on a piece of text.
    
    You must provide *either* a `plain_text` field or an `itext_payload` field.
    """
    input_to_agent = ""
    
    if request.itext_payload:
        logging.info("Received iText payload for validation.")
        # Your previous script had logic for iText. We'll just grab the 'content'
        # This is where you would re-add your iText-specific logic
        try:
            input_to_agent = request.itext_payload["data"][0]["data"]["content"]
            # You could also analyze "simplified", "elaborated", etc.
        except (KeyError, IndexError):
            logging.error("Invalid iText payload structure received.")
            return {"error": "Invalid iText payload structure. Could not find 'content' field."}, 400
    
    elif request.plain_text:
        logging.info("Received plain_text for validation.")
        input_to_agent = request.plain_text
    
    else:
        logging.warning("Validation request received without plain_text or itext_payload.")
        return {"error": "You must provide either 'plain_text' or 'itext_payload' in the request body."}, 400

    # Run the agent
    report = await validator.validate_async(input_to_agent)
    
    if "error" in report:
        return report, 500  # Internal Server Error
        
    return report

# ==============================================================================
# ¬†RUN THE SERVER
# ==============================================================================

if __name__ == "__main__":
    import uvicorn
    print("Starting API server at http://127.0.0.1:8000")
    print("See API docs at http://127.0.0.1:8000/docs")
    uvicorn.run(app, host="127.0.0.1", port=8000)

üöÄ Initializing Content Quality Agent...


üîç Running analysis on Text Paragraph...


[1m> Entering new AgentExecutor chain...[0m
‚ùå Error during agent execution or parsing: 'Input to ChatPromptTemplate is missing variables {\'\\n                "overall_score"\'}.  Expected: [\'\\n                "overall_score"\', \'input\'] Received: [\'input\', \'intermediate_steps\', \'agent_scratchpad\']\nNote: if you intended {\n                "overall_score"} to be part of the string and not a variable, please escape it with double curly braces like: \'{{\n                "overall_score"}}\'.\nFor troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/INVALID_PROMPT_INPUT '

‚úÖ FINAL AGENT REPORT:
------------------------------------------------------------
An error occurred: Failed to generate a valid report.
Raw Output: No output was generated.
------------------------------------------------------------


In [1]:
from pratice import LearningContentFetcher

fetcher = LearningContentFetcher(
    connection_string="mongodb://localhost:27017/",
    database="cornerstone_curated",
    collection="bulk_generate_content_metadata"
)

content_id = "7c2d50e3-bd0e-44bf-b491-867d63be0b2d"
content = fetcher.get_learning_content_by_id(content_id)

if content:
    print("‚úÖ Content fetched:")
    print(content)
else:
    print("‚ùå No content found for ID:", content_id)

fetcher.close()


ModuleNotFoundError: No module named 'pratice'

  from .autonotebook import tqdm as notebook_tqdm


üöÄ Initializing Content Quality Agent...


üîç Running analysis on Text Paragraph...


[1m> Entering new AgentExecutor chain...[0m
‚ùå Error during agent execution or parsing: 'Input to ChatPromptTemplate is missing variables {\'\\n                "overall_score"\'}.  Expected: [\'\\n                "overall_score"\', \'input\'] Received: [\'input\', \'intermediate_steps\', \'agent_scratchpad\']\nNote: if you intended {\n                "overall_score"} to be part of the string and not a variable, please escape it with double curly braces like: \'{{\n                "overall_score"}}\'.\nFor troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/INVALID_PROMPT_INPUT '

‚úÖ FINAL AGENT REPORT:
------------------------------------------------------------
An error occurred: Failed to generate a valid report.
Raw Output: No output was generated.
------------------------------------------------------------


  TavilySearchResults(name="FactCheckSearch", max_results=3, description="Use to verify factual claims in a piece of text.")


In [None]:
import json
import re
import os
from typing import Dict, Any, Union

from langchain_openai import ChatOpenAI
from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain.tools import Tool
from langchain_community.tools.tavily_search import TavilySearchResults
from dotenv import load_dotenv
import textstat

load_dotenv()

# ==============================================================================
#  TOOL LOGIC FUNCTIONS
# ==============================================================================

def check_length_and_structure(text: str) -> str:
    """Analyzes content length and structure (word count, paragraphs, sentences)."""
    words = text.split()
    word_count = len(words)
    sentences = [s.strip() for s in re.split(r'[.!?]+', text) if s.strip()]
    paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
    
    issues = []
    score = 10
    
    # Length checks
    if word_count < 50:
        issues.append(f"Content is too short with only {word_count} words.")
        score -= 3
    elif word_count > 1000:
        issues.append(f"Content is very long with {word_count} words. Consider breaking into sections.")
        score -= 1
    
    # Structure checks
    if word_count > 200 and len(paragraphs) == 1:
        issues.append("Long content should be broken into multiple paragraphs for better readability.")
        score -= 2
    
    if len(sentences) < 3 and word_count > 50:
        issues.append("Content has very few sentences; consider adding more detail.")
        score -= 2
    
    # Sentence length checks
    avg_sentence_length = word_count / max(1, len(sentences))
    if avg_sentence_length > 25:
        issues.append("Average sentence length is too long; consider shorter sentences.")
        score -= 1
    
    return json.dumps({
        "word_count": word_count,
        "sentences": len(sentences),
        "paragraphs": len(paragraphs),
        "avg_sentence_length": round(avg_sentence_length, 1),
        "issues": issues,
        "score": max(1, score),
        "score_explanation": f"Length & Structure score: {max(1, score)}/10. {word_count} words, {len(sentences)} sentences, {len(paragraphs)} paragraphs."
    })

def check_formatting_and_presentation(text: str) -> str:
    """Checks formatting, presentation, and visual structure."""
    issues = []
    score = 10
    
    # Check for excessive punctuation
    if re.search(r'[!?]{3,}', text):
        issues.append("Excessive exclamation marks or question marks found.")
        score -= 2
    
    # Check for all caps (shouting)
    caps_ratio = sum(1 for c in text if c.isupper()) / max(1, len(text))
    if caps_ratio > 0.3:
        issues.append("Excessive use of capital letters appears unprofessional.")
        score -= 3
    
    # Check for inconsistent spacing
    if re.search(r'\s{3,}', text):
        issues.append("Inconsistent spacing found (multiple spaces).")
        score -= 1
    
    # Check for proper sentence endings
    sentences_without_proper_ending = len([s for s in re.split(r'[.!?]+', text) if s.strip() and not re.search(r'[.!?]\s*$', s)])
    if sentences_without_proper_ending > len(text.split('.')) * 0.2:
        issues.append("Some sentences lack proper punctuation.")
        score -= 1
    
    return json.dumps({
        "formatting_issues": issues,
        "caps_ratio": round(caps_ratio, 3),
        "score": max(1, score),
        "score_explanation": f"Formatting score: {max(1, score)}/10. {len(issues)} formatting issues detected."
    })

def check_profanity_and_appropriateness(text: str) -> str:
    """Checks for profanity and inappropriate content."""
    issues = []
    score = 10
    
    # Extended profanity list
    profanity_patterns = [
        r'\b(fuck|shit|damn|bitch|asshole|bastard|crap|hell)\b',
        r'\b(stupid|idiot|moron|retard)\b',  # Potentially offensive terms
    ]
    
    for pattern in profanity_patterns:
        if re.search(pattern, text, re.IGNORECASE):
            issues.append("Inappropriate or potentially offensive language detected.")
            score -= 4
            break
    
    # Check for discriminatory language
    discriminatory_patterns = [
        r'\b(hate|discrimination|racist|sexist)\b'
    ]
    
    for pattern in discriminatory_patterns:
        if re.search(pattern, text, re.IGNORECASE):
            issues.append("Potentially discriminatory language detected.")
            score -= 3
            break
    
    return json.dumps({
        "profanity_issues": issues,
        "score": max(1, score),
        "score_explanation": f"Profanity score: {max(1, score)}/10. {len(issues)} inappropriate content issues detected."
    })

def check_clarity_and_coherence(text: str) -> str:
    """Uses LLM to check for clarity, coherence, and logical flow."""
    print("üß† Using LLM to check clarity and coherence...")
    checker_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    prompt = f"""
    You are an expert writing analyst. Evaluate the following text for clarity and coherence:
    
    Text: "{text}"
    
    Analyze:
    1. Clarity: Is the message clear and easy to understand?
    2. Coherence: Do ideas flow logically from one to another?
    3. Consistency: Is the writing style and tone consistent?
    4. Organization: Is the content well-organized?
    
    Respond with a JSON object:
    {{
        "clarity_issues": ["list of clarity problems"],
        "coherence_issues": ["list of logical flow problems"], 
        "score": <integer 1-10>,
        "score_explanation": "<detailed explanation of score>"
    }}
    """
    response = checker_llm.invoke(prompt)
    return response.content

def check_grammar_and_language(text: str) -> str:
    """Uses LLM to check for grammar, spelling, and language quality."""
    print("üß† Using LLM to check grammar and language...")
    checker_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    prompt = f"""
    You are an expert proofreader and grammar checker. Analyze the following text:
    
    Text: "{text}"
    
    Check for:
    1. Grammar mistakes
    2. Spelling errors
    3. Punctuation issues
    4. Word choice and vocabulary appropriateness
    5. Sentence structure problems
    
    Respond with a JSON object:
    {{
        "grammar_errors": ["list of grammar issues"],
        "spelling_errors": ["list of spelling mistakes"],
        "language_issues": ["list of language/vocabulary issues"],
        "score": <integer 1-10>,
        "score_explanation": "<detailed explanation of score>"
    }}
    """
    response = checker_llm.invoke(prompt)
    return response.content

def check_redundancy_and_repetition(text: str) -> str:
    """Analyzes text for redundancy, repetition, and verbose content."""
    sentences = [s.lower().strip() for s in re.split(r'[.!?]+', text) if s.strip()]
    words = text.lower().split()
    
    issues = []
    score = 10
    
    # Check sentence redundancy
    if len(sentences) > 3:
        unique_sentences = len(set(sentences))
        redundancy_ratio = unique_sentences / len(sentences)
        if redundancy_ratio < 0.7:
            redundancy_percentage = (1 - redundancy_ratio) * 100
            issues.append(f"High sentence redundancy: {redundancy_percentage:.0f}% of sentences are repetitive.")
            score -= 3
    
    # Check word repetition
    if len(words) > 20:
        word_freq = {}
        for word in words:
            if len(word) > 4:  # Only check longer words
                word_freq[word] = word_freq.get(word, 0) + 1
        
        overused_words = [(word, count) for word, count in word_freq.items() if count > len(words) * 0.05]
        if overused_words:
            issues.append(f"Overused words detected: {[f'{word}({count})' for word, count in overused_words[:3]]}")
            score -= 2
    
    # Check for filler words
    filler_words = ['very', 'really', 'quite', 'rather', 'pretty', 'somewhat', 'actually']
    filler_count = sum(1 for word in words if word in filler_words)
    if filler_count > len(words) * 0.03:
        issues.append(f"Excessive use of filler words: {filler_count} instances.")
        score -= 1
    
    return json.dumps({
        "redundancy_issues": issues,
        "unique_sentence_ratio": round(len(set(sentences)) / max(1, len(sentences)), 2),
        "filler_word_count": filler_count,
        "score": max(1, score),
        "score_explanation": f"Redundancy score: {max(1, score)}/10. Unique sentence ratio: {round(len(set(sentences)) / max(1, len(sentences)), 2)}"
    })

def check_hallucinations_and_accuracy(text: str) -> str:
    """Uses LLM and fact-checking to detect potential hallucinations and inaccuracies."""
    print("üß† Using LLM to check for potential hallucinations...")
    checker_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    prompt = f"""
    You are an expert fact-checker. Analyze the following text for potential hallucinations and inaccuracies:
    
    Text: "{text}"
    
    Look for:
    1. Factual claims that seem suspicious or incorrect
    2. Contradictory statements within the text
    3. Unrealistic or impossible scenarios
    4. Inconsistent information
    5. Claims that need verification
    
    Respond with a JSON object:
    {{
        "suspicious_claims": ["list of potentially false or unverifiable claims"],
        "contradictions": ["list of contradictory statements"],
        "verification_needed": ["list of claims that should be fact-checked"],
        "score": <integer 1-10>,
        "score_explanation": "<detailed explanation of score>"
    }}
    """
    response = checker_llm.invoke(prompt)
    return response.content

# ==============================================================================
#  AGENT CLASS
# ==============================================================================

class ContentQualityAgent:
    def __init__(self, model="gpt-4o-mini", temperature=0):
        self.llm = ChatOpenAI(model=model, temperature=temperature)
        self.setup_agent()

    def setup_agent(self):
        tools = [
            Tool(name="LengthAndStructureCheck", func=check_length_and_structure, description="Check word count, sentence count, paragraph structure, and overall organization."),
            Tool(name="FormattingCheck", func=check_formatting_and_presentation, description="Check formatting, presentation, and visual structure issues."),
            Tool(name="ProfanityCheck", func=check_profanity_and_appropriateness, description="Check for profanity, inappropriate language, and offensive content."),
            Tool(name="ClarityCheck", func=check_clarity_and_coherence, description="Check for clarity, coherence, logical flow, and organization."),
            Tool(name="GrammarCheck", func=check_grammar_and_language, description="Check grammar, spelling, punctuation, and language quality."),
            Tool(name="RedundancyCheck", func=check_redundancy_and_repetition, description="Check for redundancy, repetition, and verbose content."),
            Tool(name="HallucinationCheck", func=check_hallucinations_and_accuracy, description="Check for potential hallucinations, inaccuracies, and contradictions."),
            TavilySearchResults(name="FactCheckSearch", max_results=3, description="Verify factual claims and statements.")
        ]
        
        prompt = ChatPromptTemplate.from_messages([
            ("system", """You are an expert Content Quality Analyst. Your goal is to provide a comprehensive quality assessment of text content.

            **Your Analysis Plan:**
            1. Run ALL available checks on the provided text
            2. Use the LengthAndStructureCheck for length and structure analysis
            3. Use the FormattingCheck for formatting and presentation
            4. Use the ProfanityCheck for appropriateness and profanity
            5. Use the ClarityCheck for clarity and coherence
            6. Use the GrammarCheck for grammar and language quality
            7. Use the RedundancyCheck for redundancy and repetition
            8. Use the HallucinationCheck for accuracy and potential hallucinations
            9. Use FactCheckSearch if you find specific factual claims that need verification
            
            **FINAL ANSWER FORMATTING:**
            Your final answer MUST be a single JSON object with this exact structure:
            {{
                "overall_score": <integer 1-10>,
                "category_scores": {{
                    "length_and_structure": <score 1-10>,
                    "formatting": <score 1-10>,
                    "profanity": <score 1-10>,
                    "clarity": <score 1-10>,
                    "grammar": <score 1-10>,
                    "redundancy": <score 1-10>,
                    "hallucinations": <score 1-10>
                }},
                "score_explanations": {{
                    "length_and_structure": "<explanation>",
                    "formatting": "<explanation>",
                    "profanity": "<explanation>",
                    "clarity": "<explanation>",
                    "grammar": "<explanation>",
                    "redundancy": "<explanation>",
                    "hallucinations": "<explanation>"
                }},
                "detailed_issues": {{
                    "critical_issues": ["list of critical problems"],
                    "moderate_issues": ["list of moderate problems"],
                    "minor_issues": ["list of minor problems"]
                }},
                "summary": "<comprehensive summary with specific suggestions for improvement>"
            }}
            """),
            ("human", "{input}"),
            ("placeholder", "{agent_scratchpad}"),
        ])
        
        agent = create_openai_functions_agent(self.llm, tools, prompt)
        self.agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

    async def validate_async(self, text: str) -> Dict[str, Any]:
        """Runs comprehensive content quality analysis on the provided text."""
        
        prompt = f"Please provide a comprehensive quality analysis of the following text content:\n---\n{text}\n---"
        
        result = {} 
        try:
            result = await self.agent_executor.ainvoke({"input": prompt})
            # Use regex to find the JSON object in case the LLM adds extra text
            json_match = re.search(r'\{.*\}', result['output'], re.DOTALL)
            if json_match:
                return json.loads(json_match.group(0))
            else:
                raise json.JSONDecodeError("No JSON object found in agent output.", result['output'], 0)
        except (json.JSONDecodeError, KeyError) as e:
            print(f"‚ùå Error during agent execution or parsing: {e}")
            return {"error": "Failed to generate a valid report.", "raw_output": result.get('output', 'No output was generated.')}

# ==============================================================================
#  MAIN EXECUTION BLOCK
# ==============================================================================
async def main():
    if not os.getenv("OPENAI_API_KEY") or not os.getenv("TAVILY_API_KEY"):
        print("‚ùå Error: API keys must be set in your .env file.")
        return

    print("üöÄ Initializing Content Quality Agent...")
    validator = ContentQualityAgent()
    
    # --- CASE 2: Large Paragraph Analysis ---
    large_paragraph = """
    BREAKING NEWS!!! The goverment has SECRETLEY been using 5G towers to controll our minds and make us buy more STUFF we dont need!!! This is CIENTIFICALY proven by my frend who works at Area 51 and he told me that aliens are actualy running Facebook and Google to steal our THOUGHTS!!! Mark Zuckerberg is definatley an alien REPTILIAN who eats babys for breakfast FACT!!! Wake up SHEEPLE the truth is RIGHT THERE!!!! #conspiracy #truth #wakeup !@#$%^&*()  """
    
    print("\n\nüîç Running comprehensive analysis on paragraph content...")
    print("=" * 60)
    report = await validator.validate_async(large_paragraph)
    print_report(report)



def print_report(report: Dict[str, Any]):
    """Helper function to neatly print the agent's report."""
    print("\n‚úÖ COMPREHENSIVE CONTENT QUALITY REPORT:")
    print("=" * 70)
    
    if "error" in report:
        print(f"‚ùå An error occurred: {report['error']}")
        print(f"Raw Output: {report.get('raw_output')}")
    else:
        print(f"üèÜ OVERALL QUALITY SCORE: {report.get('overall_score', 'N/A')} / 10")
        
        # Display detailed issues
        detailed_issues = report.get('detailed_issues', {})
        if detailed_issues.get('critical_issues'):
            print("\nüö® CRITICAL ISSUES:")
            for issue in detailed_issues['critical_issues']:
                print(f"   ‚ùå {issue}")
        
        if detailed_issues.get('moderate_issues'):
            print("\n‚ö†Ô∏è  MODERATE ISSUES:")
            for issue in detailed_issues['moderate_issues']:
                print(f"   ‚ö†Ô∏è  {issue}")
        
        if detailed_issues.get('minor_issues'):
            print("\nüí° MINOR ISSUES:")
            for issue in detailed_issues['minor_issues']:
                print(f"   üí° {issue}")
        
        print("\n--- DETAILED CATEGORY BREAKDOWN ---")
        category_scores = report.get('category_scores', {})
        score_explanations = report.get('score_explanations', {})
        
        categories = [
            ('length_and_structure', 'Length & Structure'),
            ('formatting', 'Formatting'),
            ('profanity', 'Profanity & Appropriateness'),
            ('clarity', 'Clarity & Coherence'),
            ('grammar', 'Grammar & Language'),
            ('redundancy', 'Redundancy & Repetition'),
            ('hallucinations', 'Accuracy & Hallucinations')
        ]
        
        for category_key, category_name in categories:
            score = category_scores.get(category_key, 'N/A')
            explanation = score_explanations.get(category_key, "No explanation provided.")
            
            # Add emoji based on score
            if isinstance(score, int):
                if score >= 9:
                    emoji = "üü¢"
                elif score >= 7:
                    emoji = "üü°"
                elif score >= 5:
                    emoji = "üü†"
                else:
                    emoji = "üî¥"
            else:
                emoji = "‚ö™"
            
            print(f"\n{emoji} {category_name}: {score}/10")
            print(f"   ‚îî‚îÄ {explanation}")
        
        print(f"\n--- SUMMARY & RECOMMENDATIONS ---")
        print(report.get('summary', 'No summary provided.'))
    
    print("=" * 70)

# Run the main async function
if __name__ == "__main__":
    await main()


üöÄ Initializing Content Quality Agent...


üîç Running comprehensive analysis on paragraph content...


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `LengthAndStructureCheck` with `BREAKING NEWS!!! The goverment has SECRETLEY been using 5G towers to controll our minds and make us buy more STUFF we dont need!!! This is CIENTIFICALY proven by my frend who works at Area 51 and he told me that aliens are actualy running Facebook and Google to steal our THOUGHTS!!! Mark Zuckerberg is definatley an alien REPTILIAN who eats babys for breakfast FACT!!! Wake up SHEEPLE the truth is RIGHT THERE!!!! #conspiracy #truth #wakeup !@#$%^&*()  `


[0m[36;1m[1;3m{"word_count": 76, "sentences": 7, "paragraphs": 1, "avg_sentence_length": 10.9, "issues": [], "score": 10, "score_explanation": "Length & Structure score: 10/10. 76 words, 7 sentences, 1 paragraphs."}[0m[32;1m[1;3m
Invoking: `FormattingCheck` with `BREAKING NEWS!!! The goverment has SECRETLEY been using 5G towe

In [5]:
import json
import re
import os
from typing import Dict, Any, Union

from langchain_openai import ChatOpenAI
from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain.tools import Tool
from langchain_community.tools.tavily_search import TavilySearchResults
from dotenv import load_dotenv
import textstat

load_dotenv()

# ==============================================================================
#  TOOL LOGIC FUNCTIONS
# ==============================================================================

def check_length_and_structure(text: str) -> str:
    """Analyzes content length and structure (word count, paragraphs)."""
    words = text.split()
    word_count = len(words)
    sentences = [s.strip() for s in re.split(r'[.!?]+', text) if s.strip()]
    paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
    issues = []
    score = 10
    
    if word_count < 15:
        issues.append(f"Content is very short with only {word_count} words.")
        score -= 3
    if word_count > 150 and len(paragraphs) == 1:
        issues.append("Long content is not broken into paragraphs, making it hard to read.")
        score -= 2
    if len(sentences) < 2 and word_count > 20:
        issues.append("Content consists of a single long sentence; consider breaking it up.")
        score -= 2
    
    return json.dumps({
        "word_count": word_count, 
        "sentences": len(sentences), 
        "paragraphs": len(paragraphs), 
        "issues": issues,
        "score": max(1, score),
        "score_explanation": f"Structure score: {max(1, score)}/10. Based on {word_count} words, {len(sentences)} sentences, {len(paragraphs)} paragraphs."
    })

def check_grammar_with_llm(text: str) -> str:
    """Uses a powerful LLM to check for grammar and spelling errors."""
    print("üß† Using LLM to check grammar...")
    checker_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    prompt = f"""
    You are an expert proofreader. Analyze the following text for grammar and spelling mistakes.
    Respond with a JSON object with:
    - "errors": ["list of concise error descriptions"]
    - "score": integer from 1-10 (10 = perfect, deduct points for each error)
    - "score_explanation": brief explanation of why this score was given
    
    Text: "{text}"
    """
    response = checker_llm.invoke(prompt)
    return response.content

def check_readability_with_textstat(text: str) -> str:
    """Uses the 'textstat' library to calculate objective readability scores."""
    if not text.strip():
        return json.dumps({"score": 0, "level": "unassessable", "readability_score": 1, "score_explanation": "Cannot assess readability of empty text."})
    
    flesch_score = textstat.flesch_reading_ease(text)
    level = "Very Easy"
    readability_score = 10
    
    if flesch_score < 30: 
        level = "Very Confusing (College Graduate)"
        readability_score = 3
    elif flesch_score < 60: 
        level = "Difficult"
        readability_score = 6
    elif flesch_score < 80: 
        level = "Fairly Easy"
        readability_score = 8
    
    return json.dumps({
        "flesch_reading_ease_score": flesch_score, 
        "level": level,
        "readability_score": readability_score,
        "score_explanation": f"Readability score: {readability_score}/10. Flesch score of {flesch_score:.1f} indicates {level.lower()} reading level."
    })

def check_professionalism(text: str) -> str:
    """Checks for unprofessional content like profanity, slang, or excessive formatting."""
    issues = []
    score = 10
    
    if re.search(r'\b(fuck|shit|damn|bitch|asshole)\b', text, re.IGNORECASE):
        issues.append("Inappropriate or profane language was found.")
        score -= 5
    if re.search(r'\b[A-Z]{4,}\b', text) and sum(1 for c in text if c.isupper()) / len(text) > 0.3:
        issues.append("Excessive capitalization is used, which appears unprofessional.")
        score -= 3
    if re.search(r'[!?@#$%^&*()]{4,}', text):
        issues.append("Excessive punctuation or symbols are used.")
        score -= 2
    
    return json.dumps({
        "issues": issues,
        "score": max(1, score),
        "score_explanation": f"Professionalism score: {max(1, score)}/10. {len(issues)} professional issues detected."
    })

def check_redundancy(text: str) -> str:
    """Analyzes the text for repetitive sentences and overused words."""
    sentences = [s.lower().strip() for s in re.split(r'[.!?]+', text) if s.strip()]
    issues = []
    score = 10
    
    if len(sentences) > 2:
        redundancy_ratio = len(set(sentences)) / len(sentences)
        if redundancy_ratio < 0.5:
            redundancy_percentage = 100 - redundancy_ratio*100
            issues.append(f"High sentence redundancy detected. {redundancy_percentage:.0f}% of sentences are repetitive.")
            score = max(1, int(redundancy_ratio * 10))
    
    return json.dumps({
        "redundancy_issues": issues,
        "score": score,
        "score_explanation": f"Redundancy score: {score}/10. Unique sentence ratio: {(len(set(sentences)) / max(1, len(sentences))):.2f}"
    })

def check_itext_payload_faithfulness(payload_str: str) -> str:
    """
    Analyzes an iText JSON payload. It checks for:
    1. Contextual relevance between message and content
    2. Consistency between the message and data
    3. Faithfulness and relevance between the 'content', 'simplified', and 'elaborated' versions.
    """
    print("üß† Using LLM to analyze iText payload...")
    try:
        payload = json.loads(payload_str)
        message = payload.get("message", "")
        content_data = payload.get("data", [{}])[0].get("data", {})
        original = content_data.get("content", "")
        simplified = content_data.get("simplified", "")
        elaborated = content_data.get("elaborated", "")
    except (json.JSONDecodeError, IndexError, KeyError) as e:
        return json.dumps({"error": f"Invalid iText payload structure: {e}"})

    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    prompt = f"""
    You are an expert iText payload evaluator. Analyze the following payload for contextual relevance and faithfulness:
    
    Message: "{message}"
    Original Content: "{original}"
    Simplified Content: "{simplified}"
    Elaborated Content: "{elaborated}"

    Evaluate these aspects:
    
    1. **Message-Content Relevance**: Does the content actually relate to what the message indicates? 
       For example, if message mentions "data science" but content is about "astronomy", this is a MAJOR ERROR.
    
    2. **Content Consistency**: Are all three versions (original, simplified, elaborated) about the same topic?
    
    3. **Faithfulness**: Do simplified/elaborated versions maintain the original meaning without adding incorrect information?
    
    4. **Success Message Validation**: If the message indicates "success", does the content seem properly generated?

    Return ONLY a JSON object with your findings:
    {{
        "message_content_relevance": <int 1-10>,
        "content_consistency": <int 1-10>,
        "faithfulness_score": <int 1-10>,
        "overall_faithfulness_score": <int 1-10>,
        "contextual_errors": ["list of specific contextual mismatches found"],
        "faithfulness_issues": ["list of faithfulness problems between versions"],
        "score_explanation": "<Detailed explanation covering all aspects>",
        "critical_error": <boolean - true if major contextual mismatch exists>
    }}
    """
    response = llm.invoke(prompt)
    return response.content

# ==============================================================================
#  AGENT CLASS
# ==============================================================================

class ContentQualityAgent:
    def __init__(self, model="gpt-4o-mini", temperature=0):
        self.llm = ChatOpenAI(model=model, temperature=temperature)
        self.setup_agent()

    def setup_agent(self):
        tools = [
            Tool(name="iTextPayloadFaithfulnessCheck", func=check_itext_payload_faithfulness, description="Use this ONLY for a JSON input representing an iText payload. It checks contextual relevance between message and content, and faithfulness between content versions."),
            Tool(name="GrammarAndSpellingCheck", func=check_grammar_with_llm, description="Use to check a piece of text for spelling and grammatical errors."),
            Tool(name="ReadabilityCheck", func=check_readability_with_textstat, description="Use to get the readability score of a piece of text."),
            Tool(name="ProfessionalismCheck", func=check_professionalism, description="Use to check a piece of text for unprofessional language."),
            Tool(name="LengthAndStructureCheck", func=check_length_and_structure, description="Use to get word count and structure of a piece of text."),
            Tool(name="RedundancyCheck", func=check_redundancy, description="Use to check a piece of text for repetitive sentences."),
            TavilySearchResults(name="FactCheckSearch", max_results=3, description="Use to verify factual claims in a piece of text.")
        ]
        
        prompt = ChatPromptTemplate.from_messages([
            ("system", """You are an expert Quality Assurance assistant. Your goal is to provide a complete quality report.

            You will receive one of two types of input:
            1.  A plain text paragraph.
            2.  A JSON string representing an "iText payload".

            **Your Plan:**
            - If the input is an iText payload (JSON), you MUST use the `iTextPayloadFaithfulnessCheck` tool on the entire JSON. You should ALSO run standard checks (grammar, professionalism, etc.) on the 'content' field within the payload.
            - If the input is plain text, just run the standard checks on it.
            
            **CRITICAL**: For iText payloads, pay special attention to contextual relevance between the message and content. If there's a major mismatch (e.g., message about "data science" but content about "astronomy"), this should significantly impact the overall score and be prominently mentioned in the summary.
            
            After executing your plan, compile all results into the final JSON report.

            **FINAL ANSWER FORMATTING INSTRUCTIONS:**
            Your final answer MUST be a single JSON object with the specified structure.
            {{
                "overall_score": <An integer score from 1-10>,
                "category_scores": {{
                    "grammar_and_spelling": <Score 1-10>,
                    "readability": <Score 1-10>,
                    "professionalism_and_tone": <Score 1-10>,
                    "factual_accuracy": <Score 1-10. Default to 10 if no claims to check.>,
                    "redundancy": <Score 1-10>,
                    "faithfulness": <Score 1-10. Only applies to iText. Default to 10 for plain text.>
                }},
                "score_explanations": {{
                    "grammar_and_spelling": "<Explanation for grammar score>",
                    "readability": "<Explanation for readability score>",
                    "professionalism_and_tone": "<Explanation for professionalism score>",
                    "factual_accuracy": "<Explanation for factual accuracy score>",
                    "redundancy": "<Explanation for redundancy score>",
                    "faithfulness": "<Explanation for faithfulness score including contextual relevance>"
                }},
                "contextual_errors": ["<List of contextual mismatches between message and content>"],
                "summary": "<A natural language summary of key issues and suggestions for improvement. PROMINENTLY mention any contextual errors.>"
            }}
            """),
            ("human", "{input}"),
            ("placeholder", "{agent_scratchpad}"),
        ])
        
        agent = create_openai_functions_agent(self.llm, tools, prompt)
        self.agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

    async def validate_async(self, input_data: Union[str, Dict[str, Any]]) -> Dict[str, Any]:
        """Runs the agent asynchronously on either a string or a dictionary."""
        
        if isinstance(input_data, dict):
            # It's an iText payload, serialize it to a string for the agent
            input_str = json.dumps(input_data, indent=2)
            analysis_subject = "the following iText payload"
        else:
            # It's a plain paragraph
            input_str = input_data
            analysis_subject = "the following text"
            
        prompt = f"Please provide a comprehensive quality analysis of {analysis_subject}:\n---\n{input_str}\n---"
        
        result = {} 
        try:
            result = await self.agent_executor.ainvoke({"input": prompt})
            # Use regex to find the JSON object in case the LLM adds extra text
            json_match = re.search(r'\{.*\}', result['output'], re.DOTALL)
            if json_match:
                return json.loads(json_match.group(0))
            else:
                raise json.JSONDecodeError("No JSON object found in agent output.", result['output'], 0)
        except (json.JSONDecodeError, KeyError) as e:
            print(f"‚ùå Error during agent execution or parsing: {e}")
            return {"error": "Failed to generate a valid report.", "raw_output": result.get('output', 'No output was generated.')}

# ==============================================================================
#  MAIN EXECUTION BLOCK
# ==============================================================================
async def main():
    if not os.getenv("OPENAI_API_KEY") or not os.getenv("TAVILY_API_KEY"):
        print("‚ùå Error: API keys must be set in your .env file.")
        return

    print("üöÄ Initializing Content Quality Agent...")
    validator = ContentQualityAgent()

    # --- CASE 1: Plain Text Input ---
    text_input = """
Data analysis is important. Data analysis helps businesses. Businesses use data analysis. Data analysis is used by businesses to analyze data. Analyzing data is what data analysis does. Data analysis analyzes data for businesses. Businesses need data analysis to analyze their data. Data analysis is the analysis of data. Data is analyzed through data analysis. Data analysis is important for data.
    """
    print("\n\nüîç Running analysis on Plain Text Input...")
    print("=" * 60)
    report1 = await validator.validate_async(text_input)
    print_report(report1)

    # # --- CASE 2: iText Payload with CONTEXTUAL MISMATCH ---
    # itext_payload = {
    #     "message": "Successfully generated data science content",
    #     "data": [
    #         {
    #             "type": "iText",
    #             "data": {
    #                 "content": "The sun is a star at the center of the Solar System. It is a nearly perfect ball of hot plasma. The Sun's diameter is about 109 times that of Earth.",
    #                 "simplified": "The sun is a big star in the middle of our solar system.",
    #                 "elaborated": "The Sun, a G-type main-sequence star (G2V), is the focal point of our Solar System. Comprising over 99.8% of the system's mass, it is an immense sphere of incandescent plasma, with a diameter of approximately 1.39 million kilometers, roughly 109 times that of our planet."
    #             }
    #         }
    #     ]
    # }
    # print("\n\nüîç Running analysis on iText Payload...")
    # print("=" * 60)
    # report2 = await validator.validate_async(itext_payload)
    # print_report(report2)

def print_report(report: Dict[str, Any]):
    """Helper function to neatly print the agent's report."""
    print("\n‚úÖ FINAL AGENT REPORT:")
    print("-" * 60)
    
    if "error" in report:
        print(f"An error occurred: {report['error']}")
        print(f"Raw Output: {report.get('raw_output')}")
    else:
        print(f"üèÜ Overall Quality Score: {report.get('overall_score', 'N/A')} / 10")
        
        # Check for contextual errors
        contextual_errors = report.get('contextual_errors', [])
        if contextual_errors:
            print("\nüö® CONTEXTUAL ERRORS DETECTED:")
            for error in contextual_errors:
                print(f"   ‚ùå {error}")
        
        print("\n--- Category Breakdown ---")
        category_scores = report.get('category_scores', {})
        score_explanations = report.get('score_explanations', {})
        
        for category, score in category_scores.items():
            category_name = category.replace('_', ' ').title()
            explanation = score_explanations.get(category, "No explanation provided.")
            print(f"   - {category_name}: {score}/10")
            print(f"     ‚îî‚îÄ {explanation}")
        
        print("\n--- Summary & Suggestions ---")
        print(report.get('summary', 'No summary provided.'))
    
    print("-" * 60)

# Run the main async function
if __name__ == "__main__":
    await main()


üöÄ Initializing Content Quality Agent...


üîç Running analysis on Plain Text Input...


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `GrammarAndSpellingCheck` with `Data analysis is important. Data analysis helps businesses. Businesses use data analysis. Data analysis is used by businesses to analyze data. Analyzing data is what data analysis does. Data analysis analyzes data for businesses. Businesses need data analysis to analyze their data. Data analysis is the analysis of data. Data is analyzed through data analysis. Data analysis is important for data.`


[0müß† Using LLM to check grammar...
[33;1m[1;3m```json
{
  "errors": [
    "Repetitive use of 'data analysis' and 'data' makes the text redundant.",
    "Lack of variety in sentence structure leads to monotony."
  ],
  "score": 6,
  "score_explanation": "The text is grammatically correct and free of spelling mistakes, but it suffers from excessive repetition and lack of variety in sentence struct

In [None]:
import json
import re
import os
from typing import Dict, Any, Union

from langchain_openai import ChatOpenAI
from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain.tools import Tool
from langchain_community.tools.tavily_search import TavilySearchResults
from dotenv import load_dotenv
import textstat

load_dotenv()

# ==============================================================================
#  TEXT CLEANING FUNCTION
# ==============================================================================

def clean_paragraph_text(text: str) -> str:
    """
    Cleans paragraph text by removing unwanted characters, escape sequences, and formatting issues.
    """
    if not text:
        return ""
    
    # Remove escape characters
    text = text.replace('\\"', '"')
    text = text.replace('\\n', '\n')
    text = text.replace('\\t', ' ')
    text = text.replace('\\r', '')
    text = text.replace('\\\\', '\\')
    
    # Remove JSON-like artifacts
    text = re.sub(r'[{}[\]"]', '', text)
    
    # Remove URLs
    text = re.sub(r'https?://[^\s]+', '', text)
    
    # Remove excessive whitespace
    text = re.sub(r'\s+', ' ', text)
    
    # Remove excessive punctuation
    text = re.sub(r'[.]{3,}', '...', text)
    text = re.sub(r'[!]{2,}', '!', text)
    text = re.sub(r'[?]{2,}', '?', text)
    
    # Remove special characters that don't belong in normal text
    text = re.sub(r'[^\w\s.,!?;:()\-\'\"]+', '', text)
    
    # Clean up line breaks and spacing
    text = re.sub(r'\n\s*\n', '\n\n', text)
    text = text.strip()
    
    return text

# ==============================================================================
#  TOOL LOGIC FUNCTIONS
# ==============================================================================

def check_length_and_structure(text: str) -> str:
    """Analyzes content length and structure (word count, paragraphs)."""
    words = text.split()
    word_count = len(words)
    sentences = [s.strip() for s in re.split(r'[.!?]+', text) if s.strip()]
    paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
    issues = []
    score = 10
    
    if word_count < 15:
        issues.append(f"Content is very short with only {word_count} words.")
        score -= 3
    if word_count > 150 and len(paragraphs) == 1:
        issues.append("Long content is not broken into paragraphs, making it hard to read.")
        score -= 2
    if len(sentences) < 2 and word_count > 20:
        issues.append("Content consists of a single long sentence; consider breaking it up.")
        score -= 2
    
    return json.dumps({
        "word_count": word_count, 
        "sentences": len(sentences), 
        "paragraphs": len(paragraphs), 
        "issues": issues,
        "score": max(1, score),
        "score_explanation": f"Structure score: {max(1, score)}/10. Based on {word_count} words, {len(sentences)} sentences, {len(paragraphs)} paragraphs."
    })

def check_grammar_with_llm(text: str) -> str:
    """Uses a powerful LLM to check for grammar and spelling errors."""
    print("üß† Using LLM to check grammar...")
    checker_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    prompt = f"""
    You are an expert proofreader. Analyze the following text for grammar and spelling mistakes.
    Respond with a JSON object with:
    - "errors": ["list of concise error descriptions"]
    - "score": integer from 1-10 (10 = perfect, deduct points for each error)
    - "score_explanation": brief explanation of why this score was given
    
    Text: "{text}"
    """
    response = checker_llm.invoke(prompt)
    return response.content

def check_readability_with_textstat(text: str) -> str:
    """Uses the 'textstat' library to calculate objective readability scores."""
    if not text.strip():
        return json.dumps({"score": 0, "level": "unassessable", "readability_score": 1, "score_explanation": "Cannot assess readability of empty text."})
    
    flesch_score = textstat.flesch_reading_ease(text)
    level = "Very Easy"
    readability_score = 10
    
    if flesch_score < 30: 
        level = "Very Confusing (College Graduate)"
        readability_score = 3
    elif flesch_score < 60: 
        level = "Difficult"
        readability_score = 6
    elif flesch_score < 80: 
        level = "Fairly Easy"
        readability_score = 8
    
    return json.dumps({
        "flesch_reading_ease_score": flesch_score, 
        "level": level,
        "readability_score": readability_score,
        "score_explanation": f"Readability score: {readability_score}/10. Flesch score of {flesch_score:.1f} indicates {level.lower()} reading level."
    })

def check_professionalism(text: str) -> str:
    """Checks for unprofessional content like profanity, slang, or excessive formatting."""
    issues = []
    score = 10
    
    if re.search(r'\b(fuck|shit|damn|bitch|asshole)\b', text, re.IGNORECASE):
        issues.append("Inappropriate or profane language was found.")
        score -= 5
    if re.search(r'\b[A-Z]{4,}\b', text) and sum(1 for c in text if c.isupper()) / len(text) > 0.3:
        issues.append("Excessive capitalization is used, which appears unprofessional.")
        score -= 3
    if re.search(r'[!?@#$%^&*()]{4,}', text):
        issues.append("Excessive punctuation or symbols are used.")
        score -= 2
    
    return json.dumps({
        "issues": issues,
        "score": max(1, score),
        "score_explanation": f"Professionalism score: {max(1, score)}/10. {len(issues)} professional issues detected."
    })

def check_redundancy(text: str) -> str:
    """Analyzes the text for repetitive sentences and overused words."""
    sentences = [s.lower().strip() for s in re.split(r'[.!?]+', text) if s.strip()]
    issues = []
    score = 10
    
    if len(sentences) > 2:
        redundancy_ratio = len(set(sentences)) / len(sentences)
        if redundancy_ratio < 0.5:
            redundancy_percentage = 100 - redundancy_ratio*100
            issues.append(f"High sentence redundancy detected. {redundancy_percentage:.0f}% of sentences are repetitive.")
            score = max(1, int(redundancy_ratio * 10))
    
    return json.dumps({
        "redundancy_issues": issues,
        "score": score,
        "score_explanation": f"Redundancy score: {score}/10. Unique sentence ratio: {(len(set(sentences)) / max(1, len(sentences))):.2f}"
    })

def clean_text_tool(text: str) -> str:
    """Tool function to clean paragraph text."""
    cleaned = clean_paragraph_text(text)
    return json.dumps({
        "cleaned_text": cleaned,
        "original_length": len(text),
        "cleaned_length": len(cleaned),
        "characters_removed": len(text) - len(cleaned)
    })

# ==============================================================================
#  SIMPLIFIED AGENT CLASS
# ==============================================================================

class ParagraphQualityAgent:
    def __init__(self, model="gpt-4o-mini", temperature=0):
        self.llm = ChatOpenAI(model=model, temperature=temperature)
        self.setup_agent()
    
    def setup_agent(self):
        tools = [
            Tool(name="CleanText", func=clean_text_tool, description="Use to clean and remove unwanted characters from text."),
            Tool(name="GrammarAndSpellingCheck", func=check_grammar_with_llm, description="Use to check a piece of text for spelling and grammatical errors."),
            Tool(name="ReadabilityCheck", func=check_readability_with_textstat, description="Use to get the readability score of a piece of text."),
            Tool(name="ProfessionalismCheck", func=check_professionalism, description="Use to check a piece of text for unprofessional language."),
            Tool(name="LengthAndStructureCheck", func=check_length_and_structure, description="Use to get word count and structure of a piece of text."),
            Tool(name="RedundancyCheck", func=check_redundancy, description="Use to check a piece of text for repetitive sentences."),
            TavilySearchResults(name="FactCheckSearch", max_results=3, description="Use to verify factual claims in a piece of text.")
        ]
        
        prompt = ChatPromptTemplate.from_messages([
            ("system", """You are an expert Text Quality Analyst. Your goal is to clean and analyze paragraph text.

            **Your Process:**
            1. First, use the CleanText tool to remove unwanted characters and clean the input text
            2. Then run quality analysis on the cleaned text using all available tools:
               - Grammar and Spelling Check
               - Readability Check  
               - Professionalism Check
               - Length and Structure Check
               - Redundancy Check
               - Fact Check (if needed for factual claims)

            **FINAL ANSWER FORMATTING:**
            Your final answer MUST be a single JSON object:
            {{
                "overall_score": <integer 1-10>,
                "cleaned_text": "<the cleaned version of the input text>",
                "original_vs_cleaned": {{
                    "original_length": <number>,
                    "cleaned_length": <number>,
                    "characters_removed": <number>
                }},
                "category_scores": {{
                    "grammar_and_spelling": <1-10>,
                    "readability": <1-10>,
                    "professionalism_and_tone": <1-10>,
                    "factual_accuracy": <1-10>,
                    "redundancy": <1-10>,
                    "content_structure": <1-10>
                }},
                "score_explanations": {{
                    "grammar_and_spelling": "<explanation>",
                    "readability": "<explanation>",
                    "professionalism_and_tone": "<explanation>",
                    "factual_accuracy": "<explanation>",
                    "redundancy": "<explanation>",
                    "content_structure": "<explanation>"
                }},
                "issues_found": ["<list of issues>"],
                "summary": "<overall quality summary and improvement suggestions>"
            }}
            """),
            ("human", "{input}"),
            ("placeholder", "{agent_scratchpad}"),
        ])
        
        agent = create_openai_functions_agent(self.llm, tools, prompt)
        self.agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

    async def analyze_paragraph_async(self, paragraph: str) -> Dict[str, Any]:
        """Analyzes a paragraph after cleaning it."""
        
        prompt = f"Please clean and analyze the following paragraph:\n---\n{paragraph}\n---"
        
        result = {} 
        try:
            result = await self.agent_executor.ainvoke({"input": prompt})
            # Extract JSON from response
            json_match = re.search(r'\{.*\}', result['output'], re.DOTALL)
            if json_match:
                return json.loads(json_match.group(0))
            else:
                raise json.JSONDecodeError("No JSON object found in agent output.", result['output'], 0)
        except (json.JSONDecodeError, KeyError) as e:
            print(f"‚ùå Error during analysis: {e}")
            return {"error": "Failed to generate a valid report.", "raw_output": result.get('output', 'No output generated.')}

    def analyze_paragraph_sync(self, paragraph: str) -> Dict[str, Any]:
        """Synchronous version of paragraph analysis."""
        
        # Clean the text first
        cleaned_text = clean_paragraph_text(paragraph)
        
        # Simple quality analysis without agent (for quick results)
        return {
            "overall_score": 8,  # This would be calculated based on individual scores
            "cleaned_text": cleaned_text,
            "original_vs_cleaned": {
                "original_length": len(paragraph),
                "cleaned_length": len(cleaned_text),
                "characters_removed": len(paragraph) - len(cleaned_text)
            },
            "summary": f"Text cleaned successfully. Removed {len(paragraph) - len(cleaned_text)} unwanted characters."
        }

# ==============================================================================
#  MAIN EXECUTION
# ==============================================================================

async def main():
    if not os.getenv("OPENAI_API_KEY") or not os.getenv("TAVILY_API_KEY"):
        print("‚ùå Error: API keys must be set in your .env file.")
        return

    print("üöÄ Initializing Paragraph Quality Agent...")
    analyzer = ParagraphQualityAgent()
    
    # Example paragraph with unwanted characters
    test_paragraph = """This is a \"sample paragraph\" with \\n unwanted characters and \\t formatting issues. 
    It contains some https://example.com/long-url-here and other artifacts like [brackets] and {braces}.
    The text also has    excessive   spacing and multiple....dots!!!"""
    
    print(f"\nüìù Original Paragraph:")
    print(f"'{test_paragraph}'")
    print(f"Length: {len(test_paragraph)} characters")
    
    # Clean the text first (quick version)
    cleaned = clean_paragraph_text(test_paragraph)
    print(f"\n‚ú® Cleaned Paragraph:")
    print(f"'{cleaned}'")
    print(f"Length: {len(cleaned)} characters")
    print(f"Removed: {len(test_paragraph) - len(cleaned)} characters")
    
    # Full analysis with agent
    print(f"\nüîç Running full quality analysis...")
    report = await analyzer.analyze_paragraph_async(test_paragraph)
    print_report(report)

def print_report(report: Dict[str, Any]):
    """Print the analysis report."""
    print("\n" + "="*60)
    print("üìä PARAGRAPH QUALITY ANALYSIS REPORT")
    print("="*60)
    
    if "error" in report:
        print(f"‚ùå Error: {report['error']}")
        return
    
    print(f"üèÜ Overall Quality Score: {report.get('overall_score', 'N/A')}/10")
    
    # Show cleaning results
    cleaning_info = report.get('original_vs_cleaned', {})
    if cleaning_info:
        print(f"\nüßπ Text Cleaning Results:")
        print(f"   Original length: {cleaning_info.get('original_length', 0)} characters")
        print(f"   Cleaned length: {cleaning_info.get('cleaned_length', 0)} characters")
        print(f"   Characters removed: {cleaning_info.get('characters_removed', 0)}")
    
    # Show cleaned text
    cleaned_text = report.get('cleaned_text', '')
    if cleaned_text:
        print(f"\n‚ú® Cleaned Text:")
        print(f"   '{cleaned_text[:200]}{'...' if len(cleaned_text) > 200 else ''}'")
    
    # Show category scores
    category_scores = report.get('category_scores', {})
    if category_scores:
        print(f"\nüìä Category Scores:")
        for category, score in category_scores.items():
            print(f"   {category.replace('_', ' ').title()}: {score}/10")
    
    # Show summary
    summary = report.get('summary', '')
    if summary:
        print(f"\nüí° Summary:")
        print(f"   {summary}")
    
    print("="*60)

# For standalone text cleaning function
def clean_text_simple(text: str) -> str:
    """Simple function to clean text and return result."""
    return clean_paragraph_text(text)

if __name__ == "__main__":
    # You can also use it simply like this:
    sample_text = "This is a \\\"test\\\" with unwanted \\n characters!"
    cleaned = clean_text_simple(sample_text)
    print(f"Original: {sample_text}")
    print(f"Cleaned: {cleaned}")
    
    # Or run the full analysis
    await main()


Original: This is a \"test\" with unwanted \n characters!
Cleaned: This is a test with unwanted characters!
üöÄ Initializing Paragraph Quality Agent...

üìù Original Paragraph:
'This is a "sample paragraph" with \n unwanted characters and \t formatting issues. 
    It contains some https://example.com/long-url-here and other artifacts like [brackets] and {braces}.
    The text also has    excessive   spacing and multiple....dots!!!'
Length: 257 characters

‚ú® Cleaned Paragraph:
'This is a sample paragraph with unwanted characters and formatting issues. It contains some and other artifacts like brackets and braces. The text also has excessive spacing and multiple...dots!'
Length: 194 characters
Removed: 63 characters

üîç Running full quality analysis...


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `CleanText` with `This is a "sample paragraph" with 
 unwanted characters and 	 formatting issues. 
    It contains some https://example.com/long-url-here and o