In [8]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_groq import ChatGroq
from langchain_community.tools.tavily_search import TavilySearchResults
from preprocessing import load_transcript, process_transcript
import os
import logging
from dotenv import load_dotenv

# Configure logging
os.makedirs("logs", exist_ok=True)
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("logs/blog_generator.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# Load environment variables
load_dotenv()

# Verify API keys
if not os.getenv("GROQ_API_KEY") or not os.getenv("TAVILY_API_KEY"):
    logger.error("GROQ_API_KEY or TAVILY_API_KEY not set in .env file")
    exit(1)

# Initialize the language model
try:
    llm = ChatGroq(temperature=0, model_name="meta-llama/llama-4-scout-17b-16e-instruct") 
    logger.info("Grok model initialized successfully")
except Exception as e:
    logger.error(f"Failed to initialize Grok model: {e}")
    exit(1)

# Preprocess the audio file
audio_file = "audio/Clean code challenge - Silicon Valley Season 5, Ep6.mp3"
try:
    transcript = load_transcript(model_type="base", file_path=audio_file, language="english")
    cleaned_transcript = process_transcript(transcript)
    transcript = cleaned_transcript
    logger.info("Transcript processed successfully")
except Exception as e:
    logger.error(f"Error during transcription: {e}")
    exit(1)

# Chunk transcript if too long
def chunk_transcript(transcript, max_length=10000):
    return [transcript[i:i+max_length] for i in range(0, len(transcript), max_length)]

transcript_chunks = chunk_transcript(transcript)
if len(transcript_chunks) > 1:
    logger.warning(f"Transcript split into {len(transcript_chunks)} chunks due to length")
    transcript = transcript_chunks[0]  # Use first chunk for simplicity

# Define tools
tools = [TavilySearchResults(max_results=5)]
logger.info("Tools initialized")

# Define the prompt using {input} instead of {transcript}
prompt = ChatPromptTemplate.from_messages([
    ("system", """
You are a helpful assistant that generates blog posts based on provided transcripts. The blog post MUST:
- Be informative, engaging, and well-structured with an introduction, 2-3 subheadings, bullet points where relevant, and a conclusion.
- Include at least one hyperlink to a credible source from Tavily Search results to support claims or add depth.
- Be written in a professional yet approachable tone, suitable for software developers and tech enthusiasts.
Instructions:
1. Extract key themes from the transcript provided in the input and craft a narrative.
2. Use Tavily Search to find relevant information (e.g., on coding culture or competition).
3. Explicitly cite at least one search result with a hyperlink in the blog post.
4. Structure the blog post clearly with subheadings and bullet points for key insights or recommendations.
    """),
    ("human", "{input}"),  # Use {input} to match the agent's input key
    MessagesPlaceholder(variable_name="agent_scratchpad"),
])

# Construct the tool-calling agent
agent = create_tool_calling_agent(llm, tools, prompt)
logger.info("Agent created")

# Create an agent executor
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
logger.info("Agent executor initialized")

# Invoke the agent
try:
    blog_result = agent_executor.invoke({
        "input": f"Generate a blog post based on this transcript: {transcript}"
    })
    logger.info("Blog post generated successfully")
except Exception as e:
    logger.error(f"Error during agent execution: {e}")
    exit(1)

# Save the blog post
output_file = "blog_post.md"
try:
    with open(output_file, "w") as f:
        f.write(blog_result["output"])
    logger.info(f"Blog post saved to {output_file}")
except Exception as e:
    logger.error(f"Error saving blog post: {e}")

2025-04-27 08:31:00,445 - INFO - Grok model initialized successfully
2025-04-27 08:31:00,446 - INFO - Loading transcript from 'audio/Clean code challenge - Silicon Valley Season 5, Ep6.mp3' using model 'base'. Specified language: english


2025-04-27 08:31:01,236 - INFO - Whisper model 'base' loaded.
2025-04-27 08:31:01,237 - INFO - Mapped 'english' to Whisper code 'en'
2025-04-27 08:31:01,237 - INFO - Using specified language code for transcription: en
2025-04-27 08:31:01,238 - INFO - Starting transcription for 'audio/Clean code challenge - Silicon Valley Season 5, Ep6.mp3'...
2025-04-27 08:31:51,392 - INFO - Transcription successful. Detected language: en
2025-04-27 08:31:51,392 - INFO - Processing transcript...
2025-04-27 08:31:51,393 - INFO - Transcript processing complete.
2025-04-27 08:31:51,402 - INFO - Processing transcript...
2025-04-27 08:31:51,403 - INFO - Transcript processing complete.
2025-04-27 08:31:51,404 - INFO - Transcript processed successfully
2025-04-27 08:31:51,405 - INFO - Tools initialized
2025-04-27 08:31:51,409 - INFO - Agent created
2025-04-27 08:31:51,410 - INFO - Agent executor initialized




[1m> Entering new AgentExecutor chain...[0m


2025-04-27 08:31:51,713 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


[32;1m[1;3m
Invoking: `tavily_search_results_json` with `{'query': 'coding culture competition blog post'}`


[0m[36;1m[1;3m[{'title': '2^5 Coding Competition 2018 | Engineering Health', 'url': 'https://engineering.cerner.com/blog/2-to-the-5th-coding-competition-2018/', 'content': 'December 14, 2018\n\n2^5 Coding Competition 2018\n\nJenny Saqiurila\n\nAt Cerner, associates are always encouraged to develop tools and methods to improve their productivity. With this year’s theme being “Engineering Productivity,” Cerner’s fourth annual 2^5 Coding Competition provided associates another opportunity to dive into this subject and get involved in Cerner’s development culture. [...] As the 2^5 competition runs for 32 days, the maximum number of languages that could be used by a participant is 32 languages. Susmita was named this category winner because she submitted all 32 days worth of code and used 32 different languages.\n\nShipIt XIII\n\nNovember 16, 2018\n\nOur DevCulture Team: Connec

2025-04-27 08:31:57,246 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-27 08:31:58,353 - INFO - Blog post generated successfully
2025-04-27 08:31:58,354 - INFO - Blog post saved to blog_post.md


[32;1m[1;3mThe Cutthroat World of Coding Competitions: A Lesson in Humility and Resilience

In the world of software development, coding competitions have become a staple of engineering culture. These events bring together talented engineers to showcase their skills, creativity, and problem-solving abilities under pressure. But beyond the thrill of competition, coding competitions offer valuable lessons in humility, resilience, and teamwork.

### The Dark Side of Coding Competitions

Coding competitions can bring out the best and worst in people. The transcript provided earlier illustrates a toxic example of how competition can lead to trash-talking, bullying, and unprofessional behavior. This kind of behavior is unacceptable and can create a hostile work environment.

### Building a Positive Coding Culture

To avoid such negativity, it's essential to foster a positive coding culture that emphasizes collaboration, mutual respect, and constructive feedback. Here are some key takeaways

In [10]:
blog_conten = blog_result.copy()

In [17]:
def generate_seo_elements(blog_content):
    seo_prompt = ChatPromptTemplate.from_messages([
        ("human", """
        Given this blog post, suggest:
        - An SEO-friendly title (under 60 characters).
        - A meta description (under 160 characters).
        - 5-10 tags.
        - 5-7 keywords.
        Blog post: {blog_content}
        """)
    ])
    chain = seo_prompt | llm
    return chain.invoke({"blog_content": blog_content})

# Generate SEO elements
try:
    seo_elements = generate_seo_elements(blog_conten["output"])
    logger.info("SEO elements generated successfully")
except Exception as e:
    logger.error(f"Error generating SEO elements: {e}")
    exit(1)

seo_elements

# pretty print the SEO elements
import json
seo_elements.to_json()

2025-04-27 08:46:51,383 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-27 08:46:51,385 - INFO - SEO elements generated successfully


{'lc': 1,
 'type': 'constructor',
 'id': ['langchain', 'schema', 'messages', 'AIMessage'],
 'kwargs': {'content': 'Here are the suggested SEO-friendly title, meta description, tags, and keywords:\n\n**SEO-friendly Title (under 60 characters)**\n"Coding Competitions: Humility & Resilience"\n\n**Meta Description (under 160 characters)**\n"Learn how coding competitions can improve skills & foster community, but also require humility & resilience. Discover best practices for a positive coding culture."\n\n**Tags (5-10)**\n* Coding competitions\n* Software development\n* Engineering culture\n* Problem-solving skills\n* Teamwork and collaboration\n* Code quality\n* Resilience in tech\n* Humility in coding\n* Positive coding culture\n\n**Keywords (5-7)**\n* Coding competitions\n* Software development\n* Problem-solving skills\n* Code quality\n* Resilience\n* Humility\n* Teamwork\n\nNote that I\'ve focused on using relevant and specific keywords that accurately reflect the content of the blog 

In [None]:
def generate_faq(transcript):
    faq_prompt = ChatPromptTemplate.from_messages([
        ("human", """
        Identify 3-5 potential questions from the transcript and provide concise answers in an FAQ format.
        Transcript: {transcript}
        """)
    ])
    chain = faq_prompt | llm
    return chain.invoke({"transcript": transcript})

In [None]:
def generate_social_media(blog_content):
    social_prompt = ChatPromptTemplate.from_messages([
        ("human", """
        Create:
        - 1 Twitter post (<280 characters)
        - 1 LinkedIn post (200-300 words)
        - 1 Instagram caption (50-100 words)
        Based on this blog post: {blog_content}
        """)
    ])
    chain = social_prompt | llm
    return chain.invoke({"blog_content": blog_content})

In [None]:
def generate_newsletter(blog_content):
    newsletter_prompt = ChatPromptTemplate.from_messages([
        ("human", "Create a 100-150 word summary of this blog post for a newsletter: {blog_content}")
    ])
    chain = newsletter_prompt | llm
    return chain.invoke({"blog_content": blog_content})

In [None]:
def extract_quotes(transcript):
    quote_prompt = ChatPromptTemplate.from_messages([
        ("human", "Extract 3-5 memorable quotes from this transcript: {transcript}")
    ])
    chain = quote_prompt | llm
    return chain.invoke({"transcript": transcript})