In [None]:
# Install system dependencies for PDF generation
!apt-get update -qq
!apt-get install -y wkhtmltopdf

# Install all Python libraries
!pip install -qU crewai crewai_tools langchain-groq sentence-transformers langchain_huggingface \
langchain_openai litellm streamlit pyngrok markdown2 pdfkit matplotlib seaborn


In [None]:
import os
import litellm

# Setup your API keys (replace with secure input methods for production)
os.environ['GROQ_API_KEY'] = ''   # ADD YOUR GROQ API KEY for security reasons
os.environ['TAVILY_API_KEY'] = "" # ADD YOUR TAVILY API KEY for security reasons

# Configure LiteLLM to use Groq
litellm.provider = "groq"

In [None]:
from crewai import Agent, Task, Crew
from langchain_openai import ChatOpenAI
from crewai_tools import SerperDevTool
from langchain_community.tools.tavily_search import TavilySearchResults

# Initialize LLM
llm = ChatOpenAI(
    openai_api_base="https://api.groq.com/openai/v1",
    openai_api_key=os.environ['GROQ_API_KEY'],
    model_name="groq/llama3-8b-8192",
    temperature=0,
    max_tokens=1000,
)

# Web search tool
web_search_tool = TavilySearchResults(k=3)
api_key = os.environ['TAVILY_API_KEY']

# Define agents
researcher = Agent(
    role='Researcher and Specialist Web Scraper',
    goal='Search and scrape job platforms like LinkedIn, Bayt, Wuzzuf, and Glassdoor to collect data about the most in-demand AI/ML job roles in MENA.',
    backstory='Experienced web scraper focused on job market trends.',
    tools=[SerperDevTool(api_key=api_key)],
    llm=llm
)

data_extraction_agent = Agent(
    role='Data Extractor',
    goal='Parse job postings to extract job titles, skills, locations, and company names.',
    backstory='Data analyst skilled in structured extraction.',
    tools=[SerperDevTool(api_key=api_key)],
    llm=llm
)

trend_analysis_agent = Agent(
    role='Trend Analyst',
    goal='Analyze job data to identify top roles, skills, and job locations.',
    backstory='Market analyst specialized in labor trends.',
    tools=[],
    llm=llm
)

report_writer_agent = Agent(
    role='Report Writer',
    goal='Compile a structured markdown report with job trends, roles, and visualizations.',
    backstory='Technical writer and visual storyteller.',
    tools=[],
    llm=llm
)

# Define tasks
researcher_task = Task(
    description='Scrape LinkedIn, Bayt, Wuzzuf, and Glassdoor for AI/ML jobs in MENA.',
    expected_output='Dataset of job titles, skills, locations, and company names.',
    agent=researcher
)

data_extraction_task = Task(
    description='Extract structured job data from the scraped postings.',
    expected_output='Structured CSV/JSON with job_title, skills, location, company_name.',
    agent=data_extraction_agent
)

trend_analysis_task = Task(
    description='Analyze structured data to identify top roles, key skills, and country distribution.',
    expected_output='Insight report on AI/ML roles and skills demand by region.',
    agent=trend_analysis_agent
)

report_writer_task = Task(
    description='Write markdown report titled "Top AI/ML Jobs in MENA – May 2025" with visualizations.',
    expected_output='Markdown report with roles, skills, country trends, charts.',
    agent=report_writer_agent
)


In [None]:
from crewai import Crew
import markdown2
import pdfkit

crew = Crew(
    agents=[researcher, data_extraction_agent, trend_analysis_agent, report_writer_agent],
    tasks=[researcher_task, data_extraction_task, trend_analysis_task, report_writer_task],
    verbose=True
)

# Kickoff the crew run
result = crew.kickoff()

# Preview result snippet
print("\n==== Preview: Top AI/ML Roles in MENA ====")
print(result[:1000])

# Save as markdown file
with open("Top_AI_ML_Jobs_MENA_May_2025.md", "w", encoding='utf-8') as f:
    f.write(result)

# Convert markdown to PDF
html = markdown2.markdown(result)
pdfkit.from_string(html, "Top_AI_ML_Jobs_MENA_May_2025.pdf")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

def sample_visualization():
    roles = ['ML Engineer', 'Data Scientist', 'AI Researcher']
    counts = [50, 40, 30]
    plt.figure(figsize=(8,5))
    sns.barplot(x=roles, y=counts, palette='viridis')
    plt.title('Top AI/ML Roles in MENA')
    plt.xlabel('Job Role')
    plt.ylabel('Number of Postings')
    plt.tight_layout()
    plt.savefig('roles_distribution.png')
    plt.close()

sample_visualization()
