In [None]:
# This uses hugging face modesl
!pip install smolagents arxiv pypdf networkx matplotlib

Collecting smolagents
  Downloading smolagents-1.5.0-py3-none-any.whl.metadata (9.7 kB)
Collecting arxiv
  Downloading arxiv-2.1.3-py3-none-any.whl.metadata (6.1 kB)
Collecting pypdf
  Downloading pypdf-5.1.0-py3-none-any.whl.metadata (7.2 kB)
Collecting pandas>=2.2.3 (from smolagents)
  Downloading pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Collecting markdownify>=0.14.1 (from smolagents)
  Downloading markdownify-0.14.1-py3-none-any.whl.metadata (8.5 kB)
Collecting duckduckgo-search>=6.3.7 (from smolagents)
  Downloading duckduckgo_search-7.2.1-py3-none-any.whl.metadata (17 kB)
Collecting feedparser~=6.0.10 (from arxiv)
  Downloading feedparser-6.0.11-py3-none-any.whl.metadata (2.4 kB)
Collecting primp>=0.10.0 (from duckduckgo-search>=6.3.7->smolagents)
  Downloading primp-0.10.1-cp38-abi3-manylinux_2_17_x86_64.manylinux

In [None]:
from smolagents import tool, CodeAgent, HfApiModel
import arxiv
from pypdf import PdfReader
import matplotlib.pyplot as plt
import networkx as nx
from typing import List, Dict, Any
from collections import Counter
import re
from datetime import datetime

@tool
def search_papers(query: str, max_results: int = 10) -> List[Dict[str, Any]]:
    """
    Searches for academic papers on ArXiv

    Args:
        query: Search query string representing the topic to search for
        max_results: Maximum number of papers to retrieve from ArXiv

    Returns:
        List of paper dictionaries containing title, authors, year, and abstract
    """
    papers = []
    client = arxiv.Client()
    search = arxiv.Search(
        query=query,
        max_results=max_results
    )

    for result in client.results(search):
        papers.append({
            'title': result.title,
            'authors': [str(author) for author in result.authors],
            'year': result.published.year,
            'abstract': result.summary,
            'id': result.entry_id,
            'pdf_url': result.pdf_url,
            'categories': result.categories
        })

    return papers

@tool
def analyze_business_impact(papers: List[Dict[str, Any]]) -> Dict[str, Any]:
    """
    Analyzes the business implications from research papers

    Args:
        papers: List of paper dictionaries containing research information

    Returns:
        Dictionary containing business impact analysis
    """
    # Keywords for different business aspects
    application_keywords = ['application', 'industry', 'commercial', 'product', 'market', 'business']
    challenge_keywords = ['challenge', 'limitation', 'problem', 'issue', 'barrier']
    opportunity_keywords = ['opportunity', 'potential', 'promising', 'future', 'improvement']

    analysis = {
        'potential_applications': [],
        'technical_challenges': [],
        'business_opportunities': [],
        'market_trends': [],
        'adoption_timeline': {
            'short_term': [],
            'medium_term': [],
            'long_term': []
        }
    }

    # Analyze each paper
    for paper in papers:
        text = paper['title'] + ' ' + paper['abstract']
        text_lower = text.lower()

        # Extract potential applications
        for keyword in application_keywords:
            pattern = f"{keyword}.*?\\."
            matches = re.findall(pattern, text_lower)
            if matches:
                analysis['potential_applications'].extend(matches)

        # Identify technical challenges
        for keyword in challenge_keywords:
            pattern = f"{keyword}.*?\\."
            matches = re.findall(pattern, text_lower)
            if matches:
                analysis['technical_challenges'].extend(matches)

        # Find business opportunities
        for keyword in opportunity_keywords:
            pattern = f"{keyword}.*?\\."
            matches = re.findall(pattern, text_lower)
            if matches:
                analysis['business_opportunities'].extend(matches)

        # Assess implementation timeline based on paper content
        if any(word in text_lower for word in ['implemented', 'deployed', 'available', 'production']):
            analysis['adoption_timeline']['short_term'].append(paper['title'])
        elif any(word in text_lower for word in ['prototype', 'experimental', 'proposed']):
            analysis['adoption_timeline']['medium_term'].append(paper['title'])
        else:
            analysis['adoption_timeline']['long_term'].append(paper['title'])

    return analysis

@tool
def create_market_analysis(papers: List[Dict[str, Any]], business_impact: Dict[str, Any]) -> str:
    """
    Generates a market analysis report based on research papers

    Args:
        papers: List of paper dictionaries
        business_impact: Dictionary containing business impact analysis

    Returns:
        Formatted market analysis report
    """
    report = "# Market Analysis Report\n\n"

    # Overview
    report += "## Market Overview\n"
    report += f"Analysis based on {len(papers)} research papers "
    report += f"published between {min(p['year'] for p in papers)} and {max(p['year'] for p in papers)}.\n\n"

    # Technology Trends
    report += "## Technology Trends\n"
    for paper in sorted(papers, key=lambda x: x['year'], reverse=True):
        report += f"### {paper['year']}: {paper['title']}\n"
        report += f"Key Points:\n"
        report += f"- Authors: {', '.join(paper['authors'])}\n"
        # Extract key sentences from abstract containing trend-related keywords
        trend_keywords = ['trend', 'advancement', 'improvement', 'development', 'progress']
        for sentence in paper['abstract'].split('.'):
            if any(keyword in sentence.lower() for keyword in trend_keywords):
                report += f"- {sentence.strip()}.\n"
        report += "\n"

    # Market Applications
    report += "## Potential Market Applications\n"
    if business_impact['potential_applications']:
        for app in set(business_impact['potential_applications']):
            report += f"- {app.capitalize()}\n"

    # Implementation Timeline
    report += "\n## Technology Adoption Timeline\n"
    report += "\n### Short-term Opportunities (0-12 months)\n"
    for tech in business_impact['adoption_timeline']['short_term']:
        report += f"- {tech}\n"

    report += "\n### Medium-term Developments (1-3 years)\n"
    for tech in business_impact['adoption_timeline']['medium_term']:
        report += f"- {tech}\n"

    report += "\n### Long-term Possibilities (3+ years)\n"
    for tech in business_impact['adoption_timeline']['long_term']:
        report += f"- {tech}\n"

    # Challenges and Risks
    report += "\n## Technical Challenges and Business Risks\n"
    if business_impact['technical_challenges']:
        for challenge in set(business_impact['technical_challenges']):
            report += f"- {challenge.capitalize()}\n"

    # Business Opportunities
    report += "\n## Business Opportunities\n"
    if business_impact['business_opportunities']:
        for opportunity in set(business_impact['business_opportunities']):
            report += f"- {opportunity.capitalize()}\n"

    return report

@tool
def generate_executive_summary(market_analysis: str) -> str:
    """
    Creates an executive summary from the market analysis

    Args:
        market_analysis: Full market analysis report

    Returns:
        Executive summary in a business-friendly format
    """
    summary = "# Executive Summary\n\n"

    # Extract key sections from market analysis
    sections = market_analysis.split('\n## ')

    # Overview
    if len(sections) > 0:
        overview = sections[0].split('\n')[2:4]  # Skip the title
        summary += "## Overview\n"
        summary += '\n'.join(overview) + "\n\n"

    # Key Findings
    summary += "## Key Findings\n\n"

    # Extract technology trends
    summary += "### Technology Trends\n"
    trends_section = next((s for s in sections if s.startswith('Technology Trends')), '')
    if trends_section:
        trends = re.findall(r'- .*?(?=\n|$)', trends_section)[:3]  # Get top 3 trends
        for trend in trends:
            summary += f"{trend}\n"

    # Market Opportunities
    summary += "\n### Market Opportunities\n"
    opportunities_section = next((s for s in sections if s.startswith('Business Opportunities')), '')
    if opportunities_section:
        opportunities = re.findall(r'- .*?(?=\n|$)', opportunities_section)[:3]  # Top 3 opportunities
        for opp in opportunities:
            summary += f"{opp}\n"

    # Key Risks
    summary += "\n### Key Risks and Challenges\n"
    risks_section = next((s for s in sections if s.startswith('Technical Challenges')), '')
    if risks_section:
        risks = re.findall(r'- .*?(?=\n|$)', risks_section)[:3]  # Top 3 risks
        for risk in risks:
            summary += f"{risk}\n"

    return summary

def run_business_analysis(query: str, max_papers: int = 10, hf_token: str = None):
    """
    Main function to run the research-to-business analysis

    Args:
        query: Search query string
        max_papers: Maximum number of papers to analyze
        hf_token: Hugging Face API token
    """
    if not hf_token:
        raise ValueError("Please provide a Hugging Face API token")

    model = HfApiModel(
        model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
        token="your_hugging_face_token"
    )

    agent = CodeAgent(
        tools=[search_papers, analyze_business_impact,
               create_market_analysis, generate_executive_summary],
        model=model,
        add_base_tools=True
    )

    result = agent.run(
        f"""Analyze the business potential of '{query}' by:
        1. Searching for up to {max_papers} relevant research papers
        2. Analyzing business impact and market potential
        3. Creating a detailed market analysis
        4. Generating an executive summary
        Please provide updates at each step."""
    )

    return result

# Example usage
if __name__ == "__main__":
    # For testing without the agent
    try:
        # Test paper search
        papers = search_papers("autonomous vehicles business applications", max_results=3)
        print("Found papers:", len(papers))

        # Test business impact analysis
        impact = analyze_business_impact(papers)
        print("\nBusiness Impact Analysis:", impact.keys())

        # Test market analysis generation
        market_report = create_market_analysis(papers, impact)
        print("\nMarket Analysis Report Length:", len(market_report))

        # Test executive summary
        summary = generate_executive_summary(market_report)
        print("\nExecutive Summary Length:", len(summary))

    except Exception as e:
        print(f"Error during testing: {str(e)}")

Found papers: 3

Business Impact Analysis: dict_keys(['potential_applications', 'technical_challenges', 'business_opportunities', 'market_trends', 'adoption_timeline'])

Market Analysis Report Length: 1259

Executive Summary Length: 326


In [None]:
# Set your Hugging Face token
hf_token = "your_huggingface_token"  # Replace with your actual token

# Run the full analysis
analysis = run_business_analysis(
    query="autonomous vehicles business applications",
    max_papers=5,
    hf_token=hf_token
)

In [None]:
from smolagents import tool, CodeAgent, HfApiModel
import arxiv
from typing import List, Dict, Any
from collections import Counter
import re
from datetime import datetime

@tool
def search_papers(query: str, max_results: int = 10) -> List[Dict[str, Any]]:
    """
    Searches for academic papers on ArXiv with business relevance

    Args:
        query: Search query string
        max_results: Maximum number of papers to retrieve

    Returns:
        List of relevant papers with metadata
    """
    papers = []
    client = arxiv.Client()
    # Enhance query with business-related terms
    business_query = f"{query} AND (industry OR commercial OR business OR market OR application)"
    search = arxiv.Search(
        query=business_query,
        max_results=max_results
    )

    for result in client.results(search):
        papers.append({
            'title': result.title,
            'authors': [str(author) for author in result.authors],
            'year': result.published.year,
            'abstract': result.summary,
            'id': result.entry_id,
            'categories': result.categories
        })

    return papers

@tool
def analyze_market_potential(papers: List[Dict[str, Any]]) -> Dict[str, Any]:
    """
    Analyzes market potential and business implications

    Args:
        papers: List of research papers

    Returns:
        Comprehensive market analysis
    """
    analysis = {
        'market_segments': [],
        'business_models': [],
        'revenue_streams': [],
        'key_players': [],
        'competitive_advantages': [],
        'market_drivers': [],
        'market_barriers': [],
        'maturity_assessment': {
            'research_stage': [],
            'prototype_stage': [],
            'market_ready': []
        }
    }

    # Market analysis keywords
    market_keywords = {
        'segments': ['segment', 'sector', 'industry', 'vertical', 'market'],
        'business_models': ['business model', 'monetization', 'revenue', 'commercial'],
        'advantages': ['advantage', 'unique', 'superior', 'better', 'improve'],
        'drivers': ['driver', 'trend', 'growth', 'demand', 'need'],
        'barriers': ['barrier', 'challenge', 'limitation', 'constraint', 'problem']
    }

    for paper in papers:
        text = f"{paper['title']} {paper['abstract']}"

        # Extract market segments
        for keyword in market_keywords['segments']:
            matches = re.findall(f"{keyword}.*?(?=[.!?])", text.lower())
            analysis['market_segments'].extend(matches)

        # Identify business models
        for keyword in market_keywords['business_models']:
            matches = re.findall(f"{keyword}.*?(?=[.!?])", text.lower())
            analysis['business_models'].extend(matches)

        # Extract competitive advantages
        for keyword in market_keywords['advantages']:
            matches = re.findall(f"{keyword}.*?(?=[.!?])", text.lower())
            analysis['competitive_advantages'].extend(matches)

        # Identify market drivers
        for keyword in market_keywords['drivers']:
            matches = re.findall(f"{keyword}.*?(?=[.!?])", text.lower())
            analysis['market_drivers'].extend(matches)

        # Extract market barriers
        for keyword in market_keywords['barriers']:
            matches = re.findall(f"{keyword}.*?(?=[.!?])", text.lower())
            analysis['market_barriers'].extend(matches)

        # Assess technology maturity
        if any(word in text.lower() for word in ['implemented', 'deployed', 'commercial', 'product']):
            analysis['maturity_assessment']['market_ready'].append(paper['title'])
        elif any(word in text.lower() for word in ['prototype', 'demonstration', 'testing']):
            analysis['maturity_assessment']['prototype_stage'].append(paper['title'])
        else:
            analysis['maturity_assessment']['research_stage'].append(paper['title'])

    return analysis

@tool
def generate_business_report(papers: List[Dict[str, Any]], market_analysis: Dict[str, Any]) -> str:
    """
    Generates comprehensive business report

    Args:
        papers: List of research papers
        market_analysis: Market potential analysis

    Returns:
        Detailed business report in markdown format
    """
    report = "# Business Opportunity Analysis Report\n\n"

    # Executive Summary
    report += "## Executive Summary\n"
    report += f"Analysis based on {len(papers)} research publications from {min(p['year'] for p in papers)} "
    report += f"to {max(p['year'] for p in papers)}, focusing on market potential and business opportunities.\n\n"

    # Market Overview
    report += "## Market Overview\n"
    # Group papers by year
    papers_by_year = {}
    for paper in papers:
        year = paper['year']
        if year not in papers_by_year:
            papers_by_year[year] = []
        papers_by_year[year].append(paper)

    # Recent Developments
    report += "### Recent Developments\n"
    for year in sorted(papers_by_year.keys(), reverse=True):
        for paper in papers_by_year[year]:
            report += f"- ({year}) {paper['title']}\n"
            # Extract key findings from abstract
            key_findings = re.findall(r'(?:result|show|demonstrate|achieve|improve).*?(?=[.!?])', paper['abstract'].lower())
            if key_findings:
                report += "  - Key Finding: " + key_findings[0].capitalize() + "\n"
    report += "\n"

    # Market Potential
    report += "## Market Potential\n"

    # Market Segments
    if market_analysis['market_segments']:
        report += "### Target Market Segments\n"
        unique_segments = list(set(market_analysis['market_segments']))
        for segment in unique_segments[:5]:  # Top 5 segments
            report += f"- {segment.capitalize()}\n"
    report += "\n"

    # Business Models
    if market_analysis['business_models']:
        report += "### Potential Business Models\n"
        unique_models = list(set(market_analysis['business_models']))
        for model in unique_models[:5]:  # Top 5 models
            report += f"- {model.capitalize()}\n"
    report += "\n"

    # Market Drivers & Growth Factors
    if market_analysis['market_drivers']:
        report += "### Market Drivers\n"
        unique_drivers = list(set(market_analysis['market_drivers']))
        for driver in unique_drivers[:5]:  # Top 5 drivers
            report += f"- {driver.capitalize()}\n"
    report += "\n"

    # Technology Readiness
    report += "## Technology Readiness Assessment\n"

    if market_analysis['maturity_assessment']['market_ready']:
        report += "\n### Market-Ready Solutions\n"
        for tech in market_analysis['maturity_assessment']['market_ready']:
            report += f"- {tech}\n"

    if market_analysis['maturity_assessment']['prototype_stage']:
        report += "\n### In Development (Prototype Stage)\n"
        for tech in market_analysis['maturity_assessment']['prototype_stage']:
            report += f"- {tech}\n"

    if market_analysis['maturity_assessment']['research_stage']:
        report += "\n### Early Research Stage\n"
        for tech in market_analysis['maturity_assessment']['research_stage']:
            report += f"- {tech}\n"

    # Risks and Challenges
    if market_analysis['market_barriers']:
        report += "\n## Risks and Challenges\n"
        unique_barriers = list(set(market_analysis['market_barriers']))
        for barrier in unique_barriers[:5]:  # Top 5 barriers
            report += f"- {barrier.capitalize()}\n"

    # Recommendations
    report += "\n## Recommendations\n"
    report += "Based on the analysis:\n"

    # Generate recommendations based on maturity assessment
    if len(market_analysis['maturity_assessment']['market_ready']) > len(market_analysis['maturity_assessment']['research_stage']):
        report += "- Market is mature for commercial entry\n"
        report += "- Focus on differentiation and competitive advantages\n"
    else:
        report += "- Technology still in early stages\n"
        report += "- Consider R&D investments and strategic partnerships\n"

    return report

def run_business_analysis(query: str, max_papers: int = 10, hf_token: str = None):
    """
    Main function to run the business analysis

    Args:
        query: Search query string
        max_papers: Maximum number of papers to analyze
        hf_token: Hugging Face API token
    """
    if not hf_token:
        raise ValueError("Please provide a Hugging Face API token")

    model = HfApiModel(
        model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
        token="your_huggingface_token"
    )

    agent = CodeAgent(
        tools=[search_papers, analyze_market_potential, generate_business_report],
        model=model,
        add_base_tools=True
    )

    result = agent.run(
        f"""Analyze the business potential of '{query}' by:
        1. Finding {max_papers} relevant research papers
        2. Analyzing market potential and business implications
        3. Generating a comprehensive business report
        Include specific market segments, business models, and recommendations."""
    )

    return result

# Example usage
if __name__ == "__main__":
    # Test individual components
    papers = search_papers("autonomous vehicles commercial applications", max_results=3)
    market_analysis = analyze_market_potential(papers)
    report = generate_business_report(papers, market_analysis)
    print("\nBusiness Report Preview:")
    print(report[:500] + "...")  # Print first 500 characters


Business Report Preview:
# Business Opportunity Analysis Report

## Executive Summary
Analysis based on 3 research publications from 2020 to 2023, focusing on market potential and business opportunities.

## Market Overview
### Recent Developments
- (2023) Smart Roads: Roadside Perception, Vehicle-Road Cooperation and Business Model
  - Key Finding: Resulting in no profitability for investors
- (2021) From market-ready ROVs to low-cost AUVs
- (2020) Road Quality Analysis Based on Cognitive Internet of Vehicles (CIoV)
  ...


In [None]:
analysis = run_business_analysis(
    query="autonomous vehicles",
    max_papers=3,
    hf_token="your_huggingface_token"
)
print(analysis)

# Business Opportunity Analysis Report

## Executive Summary
Analysis based on 3 research publications from 2020 to 2021, focusing on market potential and business opportunities.

## Market Overview
### Recent Developments
- (2021) Towards Fully Intelligent Transportation through Infrastructure-Vehicle Cooperative Autonomous Driving: Challenges and Opportunities
- (2020) A Survey and Insights on Deployments of the Connected and Autonomous Vehicles in US
- (2020) On Designing Computing Systems for Autonomous Vehicles: a PerceptIn Case Study

## Market Potential
### Target Market Segments
- Cloud and ict providers
- Automotive manufacturers
- Transportation and logistics companies

### Potential Business Models
- Data services through improved transportation infrastructure
- Hardware sales of autonomous vehicle components
- Subscription-based services for autonomous driving capabilities

### Market Drivers
- Safety improvements through autonomous vehicles
- Enhanced traffic efficiency
- 