In [2]:
import requests
import os
from dotenv import load_dotenv

### Loading Key

In [3]:
load_dotenv()
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")

In [4]:
# Tool for Extarcting Video Details from YouTube

from langchain.tools import BaseTool

class YouTubeSearchTool(BaseTool):
    name: str = "Youtube Search Tool"
    description: str = "Search YouTube videos for a given query and return top results with full descriptions"

    def _run(self, query: str):
        api_key = YOUTUBE_API_KEY

        # Step 1: Search videos
        search_url = "https://www.googleapis.com/youtube/v3/search"
        search_params = {
            "part": "snippet",
            "q": query,
            "key": api_key,
            "type": "video",
            "maxResults": 50
        }
        search_data = requests.get(search_url, params=search_params).json()
        video_ids = [item["id"]["videoId"] for item in search_data.get("items", [])]

        if not video_ids:
            return "No videos found."

        # Step 2: Fetch full descriptions
        videos_url = "https://www.googleapis.com/youtube/v3/videos"
        videos_params = {
            "part": "snippet",
            "id": ",".join(video_ids),
            "key": api_key
        }
        videos_data = requests.get(videos_url, params=videos_params).json()

        results = []
        for video in videos_data.get("items", []):
            snippet = video["snippet"]
            results.append({
                "title": snippet["title"],
                "link": f"https://www.youtube.com/watch?v={video['id']}",
                "channel": snippet["channelTitle"],
                "published": snippet["publishedAt"],
                "description": snippet["description"]
            })

        return results

### Different Type of Search Queries for Extarction

In [5]:
query =  "Smart Fans"
query2 = "Smart Fans in India"
query3 = "Smart Fans in budget"
query4 = "Smart Fans in budget in India"
query5 = "Premium Smart Fans"
query6 = "Premium Smart Fans in India"
query7 = "Best Smart Fans"
query8 = "Best Smart Fans in India"

yt_tool = YouTubeSearchTool()

result =  yt_tool._run(query)
result2 = yt_tool._run(query2)
result3 = yt_tool._run(query3)
result4 = yt_tool._run(query4)
result5 = yt_tool._run(query5)
result6 = yt_tool._run(query6)
result7 = yt_tool._run(query7)
result8 = yt_tool._run(query8)


In [6]:
# Tool for Calculating Share of Voice

from typing import List, Dict
import re
from collections import defaultdict

brands = ['Crompton','Havells','Usha','Orient Electric','Atomberg','Bajaj','Polycab','LONGWAY','Activa']

class ShareOfVoiceTool(BaseTool):
    name: str = "share_of_voice"
    description: str = "Calculate the number of times each brand is mentioned in the video data"

    def _run(self, videos: List[Dict]) -> Dict:
        mention_count = defaultdict(int)

        for video in videos:
            title = video.get('title', '')
            description = video.get('description', '')
            for brand in brands:
                if re.search(rf"\b{re.escape(brand)}\b", title, re.IGNORECASE):
                    mention_count[brand] += 2
                if re.search(rf"\b{re.escape(brand)}\b", description, re.IGNORECASE):
                    mention_count[brand] += 1

        total_mentions = sum(mention_count.values())
        if total_mentions == 0:
            return {"message": "No brands were mentioned in the videos."}

        share_of_voice = {
            brand: {
                "mentions": count,
                "share_percent": round((count / total_mentions) * 100, 2)
            }
            for brand, count in mention_count.items()
        }

        return share_of_voice


In [7]:
shareofvoice_tool = ShareOfVoiceTool()

sov_result = shareofvoice_tool._run(result)
sov_result2 = shareofvoice_tool._run(result2)
sov_result3 = shareofvoice_tool._run(result3)
sov_result4 = shareofvoice_tool._run(result4)
sov_result5 = shareofvoice_tool._run(result5)
sov_result6 = shareofvoice_tool._run(result6)
sov_result7 = shareofvoice_tool._run(result7)
sov_result8 = shareofvoice_tool._run(result8)


### Combining Query and Fetched Result so that Agent can Understand the Context

In [8]:
sov_result["heading"] = query
sov_result2["heading"] = query2
sov_result3["heading"] = query3
sov_result4["heading"] = query4
sov_result5["heading"] = query5
sov_result6["heading"] = query6
sov_result7["heading"] = query7
sov_result8["heading"] = query8

### Pulling Local LLM

In [9]:
!ollama pull llama3.2

[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠴ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠦ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠧ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠇ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠏ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠴ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠦ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠧ [K[?25h[?2026l[?2026h[?25l[1Gpulling ma

In [10]:
# Appending all results into a list so we can pass a single list to LLM 

data = []
data.append(sov_result)
data.append(sov_result2)
data.append(sov_result3)
data.append(sov_result4)
data.append(sov_result5)
data.append(sov_result6)
data.append(sov_result7)
data.append(sov_result8)

### Generating Report for Atomberg Based on Results

In [18]:
from crewai import Agent, Task, Crew, LLM

llm = LLM(model="ollama/llama3.2",temperature=0)

atomberg_agent = Agent(
    role="Market Analyst",
    goal="Analyze Atomberg's Share of Voice across multiple smart fan categories in India.",
    backstory=(
        "You are a skilled market research analyst who specializes in brand performance metrics. "
        "You excel at spotting trends, comparing categories, and delivering insights in clear, easy-to-read language."
    ),
    tools=[],
    llm = llm,
    verbose=True
)

atomberg_task = Task(
    description=f"""
You are given Share of Voice datasets for multiple smart fan categories in India.
Each dataset is a dictionary where:
- "heading" is the category name
- Other keys are brand names
- Each brand has "mentions" and "share_percent"

Your job:
1. For each dataset, extract Atomberg's mentions and share_percent.
2. Compare Atomberg's performance across all categories.
3. For each category, analyze:
   - Atomberg's raw mentions
   - Atomberg's share_percent (SOV)
   - Interpretation: what this implies about its market share in that category.
4. Identify:
   - Categories where Atomberg leads strongly
   - Categories where Atomberg is competitive
   - Categories where Atomberg is weak
5. Write a structured report with:

### Executive Summary
A concise overview of Atomberg's market presence and SOV across all categories.

### Category Breakdown
For each category:
- State the category name
- State Atomberg's mentions and share_percent in plain text
- Provide a short interpretation of what these figures suggest about market dominance or weakness (**USE NUMBERS AS IT IS FROM {data}**)
- Use bullet points or short sentences — **do not use tables or Markdown table syntax**

### Market Share & SOV Trends
- Key strengths and leadership areas
- Areas for potential growth
- Notable changes across categories

Make the output clean and readable in plain text so that it is suitable for inclusion in a PDF without formatting issues.

Data:
{data}
""",
    agent=atomberg_agent,
    expected_output="A detailed comparative market analysis report about Atomberg's Share of Voice and market share across all categories."
)

crew = Crew(
    agents=[atomberg_agent],
    tasks=[atomberg_task],
    verbose=True
)

# Run
result = crew.kickoff()

In [12]:
print(result)

**Executive Summary**

Atomberg's market presence is strong across multiple smart fan categories in India. The company leads strongly in some categories, while being competitive in others. Atomberg's raw mentions are highest in the "Premium Smart Fans" category, indicating its dominance in this segment.

**Category Breakdown**

* **Smart Fans**
	+ Atomberg: 33 mentions, 56.9% share_percent
	+ Interpretation: Atomberg is a leading brand in the smart fan market, with a significant share of voice.
* **Smart Fans in India**
	+ Atomberg: 67 mentions, 42.41% share_percent
	+ Interpretation: Atomberg's performance in this category suggests it has a strong presence in the Indian market for smart fans.
* **Smart Fans in budget**
	+ Atomberg: 53 mentions, 38.69% share_percent
	+ Interpretation: Atomberg is a leading brand in the budget segment of the smart fan market, indicating its affordability and value proposition.
* **Premium Smart Fans**
	+ Atomberg: 59 mentions, 38.56% share_percent
	+ In

### Converting and Saving PDF

In [13]:
import markdown
from weasyprint import HTML

# Suppose this is your CrewAI result
markdown_text = result.raw  # LLM output with Markdown formatting

# Convert Markdown to HTML
html_content = markdown.markdown(markdown_text)

# Optional: add basic CSS for better styling
html_with_styles = f"""
<html>
<head>
<style>
body {{ font-family: Arial, sans-serif; line-height: 1.6; }}
h1, h2, h3 {{ color: #2c3e50; }}
strong {{ font-weight: bold; }}
em {{ font-style: italic; }}
ul {{ margin: 0; padding-left: 20px; }}
li {{ margin-bottom: 5px; }}
</style>
</head>
<body>
{html_content}
</body>
</html>
"""

# Render to PDF
HTML(string=html_with_styles).write_pdf("atomberg_report.pdf")

print("PDF saved as atomberg_report.pdf with proper formatting!")

PDF saved as atomberg_report.pdf with proper formatting!


### Converting Dicts into DataFrames for Visualizations

In [14]:
import pandas as pd

df =  pd.DataFrame.from_dict(sov_result)
df2 = pd.DataFrame.from_dict(sov_result2)
df3 = pd.DataFrame.from_dict(sov_result3)
df4 = pd.DataFrame.from_dict(sov_result4)
df5 = pd.DataFrame.from_dict(sov_result5)
df6 = pd.DataFrame.from_dict(sov_result6)
df7 = pd.DataFrame.from_dict(sov_result7)
df8 = pd.DataFrame.from_dict(sov_result8)

def table(df):
        df = df.T
        df = df.reset_index()
        df = df.rename(columns={'index': 'Company'})
        df = df[df['Company'] != 'heading'].reset_index(drop=True)
        return df

df = table(df)
df2 = table(df2)
df3 = table(df3)
df4 = table(df4)
df5 = table(df5)
df6 = table(df6)
df7 = table(df7)
df8 = table(df8)

# # Saving DataFrames 

df.to_excel('Smart Fans.xlsx',index=False)
df2.to_excel('Smart Fans in India.xlsx',index=False)
df3.to_excel('Smart Fans in budget.xlsx',index=False)
df4.to_excel('Smart Fans in budget in India.xlsx',index=False)
df5.to_excel('Premium Smart Fans.xlsx',index=False)
df6.to_excel('Premium Smart Fans in India.xlsx',index=False)
df7.to_excel('Best Smart Fans.xlsx',index=False)
df8.to_excel('Best Smart Fans in India.xlsx',index=False)

In [17]:
df5

Unnamed: 0,Company,mentions,share_percent
0,Crompton,17.0,13.39
1,Havells,23.0,18.11
2,Orient Electric,10.0,7.87
3,Atomberg,61.0,48.03
4,Polycab,4.0,3.15
5,Activa,5.0,3.94
6,Usha,3.0,2.36
7,Bajaj,2.0,1.57
8,LONGWAY,2.0,1.57
