In [None]:
"""
# üéØ Notebook 04 : G√©n√©ration d'Offres d'Emploi

**Objectif** : Cr√©er un dataset d'offres d'emploi pour matcher avec le CV

**Approche** : Dataset fictif mais r√©aliste
- 25 offres r√©parties en 5 cat√©gories
- Stack techniques vari√©es
- Diff√©rents niveaux (Junior, Mid, Senior)

**Cat√©gories** :
1. ML Engineer (5 offres) - Match attendu avec votre CV : 60-70%
2. Data Scientist (5 offres) - Match attendu : 45-55%
3. Python Developer (5 offres) - Match attendu : 50-60%
4. DevOps Engineer (5 offres) - Match attendu : 40-50%
5. Frontend Developer (5 offres) - Match attendu : 15-25%
"""

In [1]:
# Imports
import sys
import json
from pathlib import Path
from datetime import datetime, timedelta
import random
from typing import List, Dict

# Ajouter le dossier racine
project_root = Path().absolute().parent
sys.path.insert(0, str(project_root))

print(f"üìÅ Projet : {project_root}")
print(f"üìÇ Notebooks : {Path().absolute()}")
print(f"‚úÖ Imports termin√©s")

üìÅ Projet : c:\Users\rober\OneDrive\Bureau\PFE
üìÇ Notebooks : c:\Users\rober\OneDrive\Bureau\PFE\notebooks
‚úÖ Imports termin√©s


In [2]:
# Templates d'offres d'emploi par cat√©gorie

JOB_TEMPLATES = {
    "ml_engineer": [
        {
            "title": "Junior ML Engineer",
            "company": "AI Startup Paris",
            "type": "CDI",
            "experience": "0-2 ans",
            "salary": "35-45K‚Ç¨",
            "description": """
Nous recherchons un Junior ML Engineer passionn√© pour rejoindre notre √©quipe R&D.

Responsabilit√©s :
- D√©velopper des mod√®les de Machine Learning (classification, r√©gression)
- Entra√Æner et optimiser des r√©seaux de neurones avec PyTorch
- D√©ployer des mod√®les en production avec Docker
- Participer aux revues de code et √† l'am√©lioration continue

Stack technique :
- Python, PyTorch, scikit-learn
- Docker, Git
- FastAPI pour les APIs
- PostgreSQL
            """,
            "requirements": [
                "Python (numpy, pandas, scikit-learn)",
                "Machine Learning basics (supervised learning)",
                "Git et GitHub",
                "Docker (notions de base)",
                "Anglais technique (lecture documentation)"
            ],
            "nice_to_have": [
                "PyTorch ou TensorFlow",
                "FastAPI ou Flask",
                "MLflow",
                "AWS ou GCP"
            ]
        },
        {
            "title": "ML Engineer - Computer Vision",
            "company": "VisionTech",
            "type": "CDI",
            "experience": "1-3 ans",
            "salary": "40-50K‚Ç¨",
            "description": """
Rejoignez notre √©quipe Computer Vision pour d√©velopper des solutions d'analyse d'images.

Missions :
- D√©velopper des mod√®les de Computer Vision (d√©tection d'objets, segmentation)
- Fine-tuner des mod√®les pr√©-entra√Æn√©s (YOLO, ResNet)
- Optimiser les performances des mod√®les pour la production
- Collaborer avec l'√©quipe produit

Technologies :
- Python, PyTorch, OpenCV
- CNNs, Transfer Learning
- Docker, Kubernetes
- MLOps (MLflow, DVC)
            """,
            "requirements": [
                "Deep Learning (CNNs, architectures modernes)",
                "PyTorch ou TensorFlow",
                "Computer Vision (OpenCV, PIL)",
                "Python avanc√©",
                "Git, Docker"
            ],
            "nice_to_have": [
                "YOLO, Faster R-CNN",
                "Kubernetes",
                "CI/CD",
                "Cloud (AWS, Azure)"
            ]
        },
        {
            "title": "NLP Engineer",
            "company": "TextAI",
            "type": "CDI",
            "experience": "1-3 ans",
            "salary": "42-52K‚Ç¨",
            "description": """
Nous cherchons un NLP Engineer pour d√©velopper des solutions de traitement du langage naturel.

Projets :
- D√©velopper des mod√®les de classification de texte
- Fine-tuner des LLMs (BERT, GPT)
- Cr√©er des syst√®mes de question-answering
- Impl√©menter des pipelines NLP en production

Stack :
- Python, HuggingFace Transformers
- spaCy, NLTK
- PyTorch
- FastAPI, Docker
- Elasticsearch
            """,
            "requirements": [
                "NLP (tokenization, embeddings, transformers)",
                "HuggingFace Transformers",
                "PyTorch",
                "Python (pandas, numpy)",
                "APIs RESTful"
            ],
            "nice_to_have": [
                "Fine-tuning LLMs",
                "spaCy",
                "Vector databases (Pinecone, Weaviate)",
                "Kubernetes"
            ]
        },
        {
            "title": "MLOps Engineer",
            "company": "DataCorp",
            "type": "CDI",
            "experience": "2-4 ans",
            "salary": "45-55K‚Ç¨",
            "description": """
Rejoignez notre √©quipe MLOps pour industrialiser le d√©ploiement de mod√®les ML.

Responsabilit√©s :
- Cr√©er des pipelines CI/CD pour mod√®les ML
- D√©ployer des mod√®les en production (Docker, Kubernetes)
- Monitorer les performances des mod√®les
- Optimiser les temps d'inf√©rence

Technologies :
- Python, Docker, Kubernetes
- MLflow, Kubeflow
- GitLab CI/CD
- Prometheus, Grafana
- AWS/GCP
            """,
            "requirements": [
                "MLOps (CI/CD pour ML)",
                "Docker, Kubernetes",
                "Python",
                "Cloud (AWS ou GCP)",
                "Git, GitLab/GitHub Actions"
            ],
            "nice_to_have": [
                "MLflow, Kubeflow",
                "Terraform",
                "Monitoring (Prometheus)",
                "FastAPI"
            ]
        },
        {
            "title": "Senior ML Engineer",
            "company": "BigTech France",
            "type": "CDI",
            "experience": "3-5 ans",
            "salary": "55-70K‚Ç¨",
            "description": """
Nous recherchons un Senior ML Engineer pour mener des projets ML strat√©giques.

Missions :
- Architecturer des solutions ML scalables
- Encadrer des juniors
- Participer aux d√©cisions techniques
- Optimiser les mod√®les pour la production

Stack technique :
- Python, PyTorch/TensorFlow
- Kubernetes, Helm
- AWS (SageMaker, Lambda, S3)
- Spark pour Big Data
- MLflow, DVC
            """,
            "requirements": [
                "5+ ans en Machine Learning",
                "Deep Learning avanc√©",
                "MLOps et architecture",
                "Kubernetes en production",
                "Leadership technique"
            ],
            "nice_to_have": [
                "Publications scientifiques",
                "Contributions open-source",
                "Spark, Big Data",
                "Plusieurs projets ML en production"
            ]
        }
    ],
    
    "data_scientist": [
        {
            "title": "Data Scientist Junior",
            "company": "Analytics Pro",
            "type": "CDI",
            "experience": "0-2 ans",
            "salary": "35-45K‚Ç¨",
            "description": """
Rejoignez notre √©quipe Data Science pour analyser des donn√©es et cr√©er des mod√®les pr√©dictifs.

Missions :
- Analyser des datasets et cr√©er des visualisations
- D√©velopper des mod√®les pr√©dictifs (r√©gression, classification)
- Collaborer avec les √©quipes m√©tier
- Pr√©senter les r√©sultats aux stakeholders

Outils :
- Python (pandas, numpy, matplotlib)
- Jupyter Notebooks
- scikit-learn
- SQL
- Git
            """,
            "requirements": [
                "Python (pandas, numpy, matplotlib)",
                "Machine Learning (scikit-learn)",
                "SQL",
                "Statistiques",
                "Communication (pr√©sentation de r√©sultats)"
            ],
            "nice_to_have": [
                "PowerBI ou Tableau",
                "Feature engineering",
                "Time series analysis",
                "A/B testing"
            ]
        },
        {
            "title": "Data Scientist - Marketing",
            "company": "MarketingTech",
            "type": "CDI",
            "experience": "1-3 ans",
            "salary": "40-50K‚Ç¨",
            "description": """
Nous cherchons un Data Scientist pour optimiser nos campagnes marketing.

Projets :
- Mod√®les de pr√©diction de churn
- Segmentation clients (clustering)
- Analyse de l'attribution marketing
- Recommandation de produits

Stack :
- Python, R
- scikit-learn, XGBoost
- SQL (PostgreSQL)
- Tableau/PowerBI
- Git
            """,
            "requirements": [
                "Python et/ou R",
                "Machine Learning (classification, clustering)",
                "SQL avanc√©",
                "Statistiques et A/B testing",
                "Visualisation de donn√©es"
            ],
            "nice_to_have": [
                "Marketing analytics",
                "Google Analytics",
                "Syst√®me de recommandation",
                "Big Data (Spark)"
            ]
        },
        {
            "title": "Data Scientist - Finance",
            "company": "FinTech Solutions",
            "type": "CDI",
            "experience": "2-4 ans",
            "salary": "45-60K‚Ç¨",
            "description": """
Rejoignez notre √©quipe Data Science pour d√©velopper des mod√®les de risque financier.

Responsabilit√©s :
- Mod√®les de scoring de cr√©dit
- D√©tection de fraude
- Pr√©diction de s√©ries temporelles
- Reporting r√©glementaire

Technologies :
- Python, scikit-learn, XGBoost
- Time series (ARIMA, Prophet)
- SQL, PySpark
- Git, Docker
            """,
            "requirements": [
                "Machine Learning (classification, regression)",
                "Time series analysis",
                "Python (pandas, scikit-learn)",
                "SQL",
                "Statistiques avanc√©es"
            ],
            "nice_to_have": [
                "Finance/Risk management",
                "PySpark",
                "R√©glementation RGPD",
                "Explainability (SHAP)"
            ]
        },
        {
            "title": "Lead Data Scientist",
            "company": "DataLab",
            "type": "CDI",
            "experience": "3-5 ans",
            "salary": "55-70K‚Ç¨",
            "description": """
Nous recherchons un Lead Data Scientist pour encadrer une √©quipe de 3-5 personnes.

Missions :
- D√©finir la strat√©gie data science
- Encadrer et mentorer l'√©quipe
- Architecturer les solutions ML
- Interface avec les √©quipes m√©tier

Stack :
- Python, R
- scikit-learn, XGBoost, LightGBM
- MLflow, DVC
- Cloud (AWS/GCP)
- Airflow pour orchestration
            """,
            "requirements": [
                "5+ ans en Data Science",
                "Leadership et encadrement",
                "ML avanc√© (ensembles, deep learning)",
                "MLOps",
                "Communication avec stakeholders"
            ],
            "nice_to_have": [
                "PhD en ML/Stats",
                "Publications",
                "Big Data (Spark)",
                "Architecture cloud"
            ]
        },
        {
            "title": "Data Scientist - HealthTech",
            "company": "MedAI",
            "type": "CDI",
            "experience": "2-4 ans",
            "salary": "48-58K‚Ç¨",
            "description": """
Rejoignez notre mission d'am√©liorer la sant√© par l'IA.

Projets :
- Analyse d'images m√©dicales
- Pr√©diction de pathologies
- Optimisation de parcours patients
- Conformit√© RGPD et donn√©es de sant√©

Technologies :
- Python, PyTorch
- Computer Vision (medical imaging)
- scikit-learn
- PostgreSQL
- Docker
            """,
            "requirements": [
                "Machine Learning et/ou Deep Learning",
                "Python avanc√©",
                "Statistiques",
                "√âthique et RGPD",
                "Communication avec m√©decins"
            ],
            "nice_to_have": [
                "Background m√©dical/biologie",
                "Computer Vision",
                "Clinical trials analysis",
                "Explainable AI"
            ]
        }
    ],
    
    "python_developer": [
        {
            "title": "Python Developer Junior",
            "company": "WebDev Studio",
            "type": "CDI",
            "experience": "0-2 ans",
            "salary": "32-42K‚Ç¨",
            "description": """
Nous recherchons un d√©veloppeur Python junior pour rejoindre notre √©quipe backend.

Missions :
- D√©velopper des APIs RESTful avec FastAPI
- Int√©grer des bases de donn√©es (PostgreSQL)
- √âcrire des tests unitaires
- Participer aux code reviews

Stack :
- Python 3.10+
- FastAPI
- PostgreSQL
- Docker
- Git
            """,
            "requirements": [
                "Python (bonnes pratiques)",
                "APIs RESTful",
                "SQL",
                "Git",
                "Tests unitaires (pytest)"
            ],
            "nice_to_have": [
                "FastAPI",
                "Docker",
                "CI/CD",
                "Redis"
            ]
        },
        {
            "title": "Backend Developer Python",
            "company": "SaaS Company",
            "type": "CDI",
            "experience": "1-3 ans",
            "salary": "38-48K‚Ç¨",
            "description": """
Rejoignez notre √©quipe pour d√©velopper notre plateforme SaaS.

Responsabilit√©s :
- D√©velopper des microservices Python
- Optimiser les performances
- Int√©grer des APIs tierces
- Maintenir la qualit√© du code

Technologies :
- Python, FastAPI/Flask
- PostgreSQL, Redis
- RabbitMQ ou Kafka
- Docker, Kubernetes
- AWS
            """,
            "requirements": [
                "Python avanc√©",
                "FastAPI ou Flask",
                "Bases de donn√©es (SQL et NoSQL)",
                "Docker",
                "APIs RESTful et GraphQL"
            ],
            "nice_to_have": [
                "Microservices",
                "Message queues (RabbitMQ)",
                "Kubernetes",
                "AWS/GCP"
            ]
        },
        {
            "title": "Python Developer - Data Engineering",
            "company": "BigData Corp",
            "type": "CDI",
            "experience": "2-4 ans",
            "salary": "42-55K‚Ç¨",
            "description": """
Nous cherchons un d√©veloppeur Python pour construire des pipelines de donn√©es.

Projets :
- D√©velopper des ETL en Python
- Traiter des volumes de donn√©es importants
- Optimiser les performances des pipelines
- D√©ployer sur le cloud

Stack :
- Python (pandas, polars)
- Airflow
- Spark (PySpark)
- PostgreSQL, MongoDB
- Docker, Kubernetes
- AWS (S3, Lambda, Glue)
            """,
            "requirements": [
                "Python (pandas, numpy)",
                "ETL et data pipelines",
                "SQL avanc√©",
                "Airflow",
                "Cloud (AWS ou GCP)"
            ],
            "nice_to_have": [
                "PySpark",
                "Streaming (Kafka)",
                "Data modeling",
                "DBT"
            ]
        },
        {
            "title": "Full Stack Developer Python/React",
            "company": "TechStartup",
            "type": "CDI",
            "experience": "2-4 ans",
            "salary": "45-55K‚Ç¨",
            "description": """
Rejoignez notre startup pour d√©velopper notre produit full-stack.

Responsabilit√©s :
- Backend Python (FastAPI)
- Frontend React
- Bases de donn√©es
- D√©ploiement cloud

Stack :
- Python, FastAPI
- React, TypeScript
- PostgreSQL
- Docker, AWS
- Git, CI/CD
            """,
            "requirements": [
                "Python (FastAPI ou Flask)",
                "React et TypeScript",
                "SQL",
                "RESTful APIs",
                "Git"
            ],
            "nice_to_have": [
                "Next.js",
                "GraphQL",
                "Docker/Kubernetes",
                "AWS"
            ]
        },
        {
            "title": "Senior Python Developer",
            "company": "Enterprise Tech",
            "type": "CDI",
            "experience": "4-6 ans",
            "salary": "55-70K‚Ç¨",
            "description": """
Nous recherchons un Senior Python Developer pour mener des projets strat√©giques.

Missions :
- Architecturer des applications Python scalables
- Mentorer des d√©veloppeurs juniors
- Optimiser les performances
- D√©finir les standards de code

Technologies :
- Python 3.11+
- FastAPI, Django
- PostgreSQL, MongoDB, Redis
- Microservices
- Kubernetes, Helm
- CI/CD avanc√©
            """,
            "requirements": [
                "6+ ans en Python",
                "Architecture logicielle",
                "Microservices",
                "Leadership technique",
                "Performance optimization"
            ],
            "nice_to_have": [
                "Contributions open-source",
                "Conf√©rences/talks",
                "Multiple frameworks",
                "Security best practices"
            ]
        }
    ],
    
    "devops_engineer": [
        {
            "title": "DevOps Engineer Junior",
            "company": "CloudOps",
            "type": "CDI",
            "experience": "0-2 ans",
            "salary": "35-45K‚Ç¨",
            "description": """
Rejoignez notre √©quipe DevOps pour automatiser le d√©ploiement d'applications.

Missions :
- Cr√©er des pipelines CI/CD
- D√©ployer des applications avec Docker
- G√©rer l'infrastructure cloud (AWS)
- Monitorer les applications

Outils :
- Docker, Docker Compose
- GitLab CI/CD ou GitHub Actions
- AWS (EC2, S3, RDS)
- Linux
- Git
            """,
            "requirements": [
                "Docker et containerisation",
                "CI/CD (GitLab ou GitHub Actions)",
                "Linux (bash scripting)",
                "Git",
                "Cloud basics (AWS ou Azure)"
            ],
            "nice_to_have": [
                "Kubernetes",
                "Terraform",
                "Ansible",
                "Python scripting"
            ]
        },
        {
            "title": "DevOps Engineer - Kubernetes",
            "company": "ContainerTech",
            "type": "CDI",
            "experience": "2-4 ans",
            "salary": "45-55K‚Ç¨",
            "description": """
Nous cherchons un DevOps sp√©cialis√© Kubernetes pour g√©rer notre infrastructure.

Responsabilit√©s :
- G√©rer des clusters Kubernetes
- D√©ployer des applications (Helm charts)
- Monitorer et optimiser les ressources
- Automatiser avec Terraform

Stack :
- Kubernetes, Helm
- Docker
- Terraform, Ansible
- AWS EKS ou GCP GKE
- Prometheus, Grafana
- ArgoCD
            """,
            "requirements": [
                "Kubernetes (d√©ploiements, services, ingress)",
                "Docker avanc√©",
                "Terraform ou Ansible",
                "Cloud (AWS, GCP ou Azure)",
                "Monitoring (Prometheus)"
            ],
            "nice_to_have": [
                "Helm charts",
                "GitOps (ArgoCD, Flux)",
                "Service mesh (Istio)",
                "CKA certification"
            ]
        },
        {
            "title": "SRE Engineer",
            "company": "ScaleOps",
            "type": "CDI",
            "experience": "3-5 ans",
            "salary": "50-65K‚Ç¨",
            "description": """
Rejoignez notre √©quipe SRE pour garantir la fiabilit√© de nos services.

Missions :
- D√©finir et suivre les SLOs
- Automatiser les t√¢ches op√©rationnelles
- R√©pondre aux incidents
- Optimiser les performances

Technologies :
- Kubernetes, Docker
- Terraform, Ansible
- Python, Go
- Prometheus, Grafana, ELK
- Cloud (multi-cloud)
            """,
            "requirements": [
                "SRE ou DevOps exp√©riment√©",
                "Kubernetes en production",
                "Scripting (Python ou Go)",
                "Monitoring avanc√©",
                "Incident management"
            ],
            "nice_to_have": [
                "On-call experience",
                "Chaos engineering",
                "Multi-cloud",
                "Security (DevSecOps)"
            ]
        },
        {
            "title": "Platform Engineer",
            "company": "InfraTech",
            "type": "CDI",
            "experience": "3-5 ans",
            "salary": "52-68K‚Ç¨",
            "description": """
Nous recherchons un Platform Engineer pour construire notre plateforme interne.

Projets :
- Cr√©er une plateforme self-service pour les devs
- Automatiser les d√©ploiements
- G√©rer l'infrastructure as code
- Am√©liorer la developer experience

Stack :
- Kubernetes, Helm
- Terraform, Pulumi
- GitLab CI/CD
- ArgoCD
- Internal Developer Platform (Backstage)
            """,
            "requirements": [
                "Kubernetes avanc√©",
                "Infrastructure as Code (Terraform)",
                "CI/CD",
                "Developer experience",
                "Scripting (Python, Bash, Go)"
            ],
            "nice_to_have": [
                "Backstage ou similaire",
                "API design",
                "Multi-tenancy",
                "FinOps"
            ]
        },
        {
            "title": "Lead DevOps Engineer",
            "company": "BigCorp",
            "type": "CDI",
            "experience": "5-7 ans",
            "salary": "60-80K‚Ç¨",
            "description": """
Nous recherchons un Lead DevOps pour diriger notre transformation cloud.

Responsabilit√©s :
- D√©finir la strat√©gie DevOps
- Encadrer une √©quipe de 5+ engineers
- Architecturer l'infrastructure
- Budget et optimisation des co√ªts cloud

Technologies :
- Multi-cloud (AWS, GCP, Azure)
- Kubernetes, Helm, ArgoCD
- Terraform
- Security et compliance
- FinOps
            """,
            "requirements": [
                "7+ ans en DevOps/SRE",
                "Leadership d'√©quipe",
                "Architecture cloud",
                "Security et compliance",
                "Budget management"
            ],
            "nice_to_have": [
                "Certifications cloud",
                "Talks/conf√©rences",
                "Open-source contributions",
                "FinOps certified"
            ]
        }
    ],
    
    "frontend_developer": [
        {
            "title": "Frontend Developer Junior",
            "company": "WebAgency",
            "type": "CDI",
            "experience": "0-2 ans",
            "salary": "30-40K‚Ç¨",
            "description": """
Nous recherchons un d√©veloppeur frontend junior pour cr√©er des interfaces web modernes.

Missions :
- D√©velopper des interfaces React
- Int√©grer des maquettes Figma
- Optimiser les performances
- Collaborer avec les designers

Stack :
- React, JavaScript
- HTML5, CSS3, SASS
- Git
- Responsive design
            """,
            "requirements": [
                "React basics",
                "JavaScript (ES6+)",
                "HTML5, CSS3",
                "Git",
                "Responsive design"
            ],
            "nice_to_have": [
                "TypeScript",
                "Next.js",
                "Tailwind CSS",
                "Testing (Jest)"
            ]
        },
        {
            "title": "React Developer",
            "company": "SaaS Startup",
            "type": "CDI",
            "experience": "1-3 ans",
            "salary": "38-48K‚Ç¨",
            "description": """
Rejoignez notre √©quipe pour d√©velopper notre application SaaS en React.

Responsabilit√©s :
- D√©velopper des composants React r√©utilisables
- G√©rer l'√©tat avec Redux/Zustand
- Int√©grer des APIs RESTful
- Optimiser les performances

Technologies :
- React, TypeScript
- Redux ou Zustand
- React Query
- Tailwind CSS
- Jest, React Testing Library
            """,
            "requirements": [
                "React avanc√©",
                "TypeScript",
                "State management (Redux/Zustand)",
                "APIs RESTful",
                "Tests unitaires"
            ],
            "nice_to_have": [
                "Next.js",
                "GraphQL",
                "Webpack/Vite",
                "CI/CD"
            ]
        },
        {
            "title": "Frontend Developer - Vue.js",
            "company": "TechCorp",
            "type": "CDI",
            "experience": "2-4 ans",
            "salary": "40-50K‚Ç¨",
            "description": """
Nous cherchons un d√©veloppeur Vue.js pour notre plateforme B2B.

Projets :
- D√©velopper des interfaces Vue.js 3
- Cr√©er des composants Vuetify
- Int√©grer des APIs GraphQL
- Optimiser les bundles

Stack :
- Vue.js 3, Composition API
- Vuetify ou Quasar
- Pinia
- GraphQL (Apollo Client)
- Vite
            """,
            "requirements": [
                "Vue.js 3 (Composition API)",
                "JavaScript/TypeScript",
                "State management (Pinia)",
                "APIs (REST ou GraphQL)",
                "Build tools (Vite, Webpack)"
            ],
            "nice_to_have": [
                "Nuxt.js",
                "Testing (Vitest)",
                "UI libraries (Vuetify)",
                "Docker"
            ]
        },
        {
            "title": "Full Stack JavaScript Developer",
            "company": "Digital Agency",
            "type": "CDI",
            "experience": "2-4 ans",
            "salary": "42-55K‚Ç¨",
            "description": """
Rejoignez notre agence pour d√©velopper des projets web full-stack.

Responsabilit√©s :
- Frontend React/Next.js
- Backend Node.js/Express
- Bases de donn√©es MongoDB
- D√©ploiement Vercel/Netlify

Technologies :
- React, Next.js
- Node.js, Express
- MongoDB, Prisma
- TypeScript
- Vercel, AWS
            """,
            "requirements": [
                "React et Next.js",
                "Node.js et Express",
                "MongoDB ou PostgreSQL",
                "TypeScript",
                "APIs RESTful"
            ],
            "nice_to_have": [
                "GraphQL",
                "Prisma ORM",
                "Serverless",
                "Docker"
            ]
        },
        {
            "title": "Lead Frontend Developer",
            "company": "Enterprise UI",
            "type": "CDI",
            "experience": "4-6 ans",
            "salary": "55-70K‚Ç¨",
            "description": """
Nous recherchons un Lead Frontend pour encadrer notre √©quipe UI.

Missions :
- Architecturer les applications frontend
- D√©finir les standards de code
- Mentorer l'√©quipe (3-5 devs)
- Optimiser les performances

Stack :
- React, TypeScript
- Micro-frontends
- Monorepo (Nx, Turborepo)
- Testing avanc√©
- CI/CD
            """,
            "requirements": [
                "6+ ans en frontend",
                "Architecture frontend",
                "Leadership technique",
                "Performance optimization",
                "Micro-frontends"
            ],
            "nice_to_have": [
                "Monorepo expertise",
                "Web Vitals expert",
                "Accessibility (WCAG)",
                "Design systems"
            ]
        }
    ]
}

print("‚úÖ Templates charg√©s")
print(f"   ‚Ä¢ ML Engineer : {len(JOB_TEMPLATES['ml_engineer'])} offres")
print(f"   ‚Ä¢ Data Scientist : {len(JOB_TEMPLATES['data_scientist'])} offres")
print(f"   ‚Ä¢ Python Developer : {len(JOB_TEMPLATES['python_developer'])} offres")
print(f"   ‚Ä¢ DevOps Engineer : {len(JOB_TEMPLATES['devops_engineer'])} offres")
print(f"   ‚Ä¢ Frontend Developer : {len(JOB_TEMPLATES['frontend_developer'])} offres")
print(f"\nüìä TOTAL : {sum(len(v) for v in JOB_TEMPLATES.values())} offres")

‚úÖ Templates charg√©s
   ‚Ä¢ ML Engineer : 5 offres
   ‚Ä¢ Data Scientist : 5 offres
   ‚Ä¢ Python Developer : 5 offres
   ‚Ä¢ DevOps Engineer : 5 offres
   ‚Ä¢ Frontend Developer : 5 offres

üìä TOTAL : 25 offres


In [3]:
def generate_job_posting(template: Dict, job_id: int, category: str) -> Dict:
    """
    G√©n√©rer une offre d'emploi compl√®te √† partir d'un template
    
    Args:
        template: Template d'offre
        job_id: ID unique de l'offre
        category: Cat√©gorie (ml_engineer, data_scientist, etc.)
    
    Returns:
        Dict avec l'offre compl√®te
    """
    # G√©n√©rer une date de publication al√©atoire (entre 1 et 30 jours)
    days_ago = random.randint(1, 30)
    posted_date = (datetime.now() - timedelta(days=days_ago)).strftime("%Y-%m-%d")
    
    # G√©n√©rer des localisations vari√©es
    locations = [
        "Paris, France",
        "Lyon, France",
        "Toulouse, France",
        "Remote France",
        "Paris (Hybrid)",
        "Bordeaux, France",
        "Nantes, France"
    ]
    
    job_posting = {
        "job_id": f"job_{job_id:03d}",
        "category": category,
        "title": template["title"],
        "company": template["company"],
        "location": random.choice(locations),
        "type": template["type"],
        "experience": template["experience"],
        "salary": template["salary"],
        "description": template["description"].strip(),
        "requirements": template["requirements"],
        "nice_to_have": template["nice_to_have"],
        "posted_date": posted_date,
        "url": f"https://example-jobs.com/{job_id}",
        "remote_ok": "Remote" in random.choice(locations) or random.random() > 0.7,
        "applicants": random.randint(5, 150)
    }
    
    return job_posting


# Test de la fonction
print("\nüß™ TEST : G√©n√©ration d'une offre")
print("=" * 60)

test_job = generate_job_posting(
    JOB_TEMPLATES["ml_engineer"][0],
    job_id=1,
    category="ml_engineer"
)

print(f"‚úÖ Offre g√©n√©r√©e :")
print(f"   ‚Ä¢ ID : {test_job['job_id']}")
print(f"   ‚Ä¢ Titre : {test_job['title']}")
print(f"   ‚Ä¢ Entreprise : {test_job['company']}")
print(f"   ‚Ä¢ Localisation : {test_job['location']}")
print(f"   ‚Ä¢ Exp√©rience : {test_job['experience']}")
print(f"   ‚Ä¢ Salaire : {test_job['salary']}")
print(f"   ‚Ä¢ Remote : {'Oui' if test_job['remote_ok'] else 'Non'}")
print(f"   ‚Ä¢ Post√©e le : {test_job['posted_date']}")
print(f"   ‚Ä¢ Candidatures : {test_job['applicants']}")


üß™ TEST : G√©n√©ration d'une offre
‚úÖ Offre g√©n√©r√©e :
   ‚Ä¢ ID : job_001
   ‚Ä¢ Titre : Junior ML Engineer
   ‚Ä¢ Entreprise : AI Startup Paris
   ‚Ä¢ Localisation : Remote France
   ‚Ä¢ Exp√©rience : 0-2 ans
   ‚Ä¢ Salaire : 35-45K‚Ç¨
   ‚Ä¢ Remote : Oui
   ‚Ä¢ Post√©e le : 2025-12-26
   ‚Ä¢ Candidatures : 80


In [4]:
# G√©n√©rer toutes les offres
print("\nüîß G√âN√âRATION DU DATASET COMPLET")
print("=" * 60)

all_jobs = []
job_id = 1

for category, templates in JOB_TEMPLATES.items():
    print(f"\nüìÇ Cat√©gorie : {category}")
    
    for template in templates:
        job = generate_job_posting(template, job_id, category)
        all_jobs.append(job)
        
        print(f"   {job_id}. {job['title']} - {job['company']}")
        
        job_id += 1

print(f"\n‚úÖ Dataset g√©n√©r√© : {len(all_jobs)} offres")
print(f"   ‚Ä¢ ML Engineer : {len([j for j in all_jobs if j['category'] == 'ml_engineer'])}")
print(f"   ‚Ä¢ Data Scientist : {len([j for j in all_jobs if j['category'] == 'data_scientist'])}")
print(f"   ‚Ä¢ Python Developer : {len([j for j in all_jobs if j['category'] == 'python_developer'])}")
print(f"   ‚Ä¢ DevOps Engineer : {len([j for j in all_jobs if j['category'] == 'devops_engineer'])}")
print(f"   ‚Ä¢ Frontend Developer : {len([j for j in all_jobs if j['category'] == 'frontend_developer'])}")


üîß G√âN√âRATION DU DATASET COMPLET

üìÇ Cat√©gorie : ml_engineer
   1. Junior ML Engineer - AI Startup Paris
   2. ML Engineer - Computer Vision - VisionTech
   3. NLP Engineer - TextAI
   4. MLOps Engineer - DataCorp
   5. Senior ML Engineer - BigTech France

üìÇ Cat√©gorie : data_scientist
   6. Data Scientist Junior - Analytics Pro
   7. Data Scientist - Marketing - MarketingTech
   8. Data Scientist - Finance - FinTech Solutions
   9. Lead Data Scientist - DataLab
   10. Data Scientist - HealthTech - MedAI

üìÇ Cat√©gorie : python_developer
   11. Python Developer Junior - WebDev Studio
   12. Backend Developer Python - SaaS Company
   13. Python Developer - Data Engineering - BigData Corp
   14. Full Stack Developer Python/React - TechStartup
   15. Senior Python Developer - Enterprise Tech

üìÇ Cat√©gorie : devops_engineer
   16. DevOps Engineer Junior - CloudOps
   17. DevOps Engineer - Kubernetes - ContainerTech
   18. SRE Engineer - ScaleOps
   19. Platform Engineer - I

In [5]:
# Analyser le dataset
print("\nüìä ANALYSE DU DATASET")
print("=" * 60)

# Statistiques g√©n√©rales
print(f"\n1Ô∏è‚É£ STATISTIQUES G√âN√âRALES")
print(f"   ‚Ä¢ Nombre total d'offres : {len(all_jobs)}")
print(f"   ‚Ä¢ Offres remote : {len([j for j in all_jobs if j['remote_ok']])} ({len([j for j in all_jobs if j['remote_ok']]) / len(all_jobs) * 100:.1f}%)")

# Distribution par niveau d'exp√©rience
exp_levels = {}
for job in all_jobs:
    exp = job['experience']
    exp_levels[exp] = exp_levels.get(exp, 0) + 1

print(f"\n2Ô∏è‚É£ R√âPARTITION PAR EXP√âRIENCE")
for exp, count in sorted(exp_levels.items()):
    print(f"   ‚Ä¢ {exp:15s} : {count:2d} offres")

# Distribution par cat√©gorie
print(f"\n3Ô∏è‚É£ R√âPARTITION PAR CAT√âGORIE")
categories = {}
for job in all_jobs:
    cat = job['category'].replace('_', ' ').title()
    categories[cat] = categories.get(cat, 0) + 1

for cat, count in sorted(categories.items(), key=lambda x: x[1], reverse=True):
    bar = "‚ñà" * count
    print(f"   ‚Ä¢ {cat:20s} : {bar} {count}")

# √âchantillon d'offres
print(f"\n4Ô∏è‚É£ √âCHANTILLON D'OFFRES (5 premi√®res)")
print("-" * 60)
for i, job in enumerate(all_jobs[:5], 1):
    print(f"{i}. {job['title']}")
    print(f"   Entreprise : {job['company']}")
    print(f"   Localisation : {job['location']}")
    print(f"   Exp√©rience : {job['experience']}")
    print(f"   Salaire : {job['salary']}")
    print(f"   Comp√©tences : {', '.join(job['requirements'][:3])}...")
    print()


üìä ANALYSE DU DATASET

1Ô∏è‚É£ STATISTIQUES G√âN√âRALES
   ‚Ä¢ Nombre total d'offres : 25
   ‚Ä¢ Offres remote : 8 (32.0%)

2Ô∏è‚É£ R√âPARTITION PAR EXP√âRIENCE
   ‚Ä¢ 0-2 ans         :  5 offres
   ‚Ä¢ 1-3 ans         :  5 offres
   ‚Ä¢ 2-4 ans         :  8 offres
   ‚Ä¢ 3-5 ans         :  4 offres
   ‚Ä¢ 4-6 ans         :  2 offres
   ‚Ä¢ 5-7 ans         :  1 offres

3Ô∏è‚É£ R√âPARTITION PAR CAT√âGORIE
   ‚Ä¢ Ml Engineer          : ‚ñà‚ñà‚ñà‚ñà‚ñà 5
   ‚Ä¢ Data Scientist       : ‚ñà‚ñà‚ñà‚ñà‚ñà 5
   ‚Ä¢ Python Developer     : ‚ñà‚ñà‚ñà‚ñà‚ñà 5
   ‚Ä¢ Devops Engineer      : ‚ñà‚ñà‚ñà‚ñà‚ñà 5
   ‚Ä¢ Frontend Developer   : ‚ñà‚ñà‚ñà‚ñà‚ñà 5

4Ô∏è‚É£ √âCHANTILLON D'OFFRES (5 premi√®res)
------------------------------------------------------------
1. Junior ML Engineer
   Entreprise : AI Startup Paris
   Localisation : Toulouse, France
   Exp√©rience : 0-2 ans
   Salaire : 35-45K‚Ç¨
   Comp√©tences : Python (numpy, pandas, scikit-learn), Machine Learning basics (supervised learning), G

In [6]:
# Sauvegarder le dataset
print("\nüíæ SAUVEGARDE DU DATASET")
print("=" * 60)

# Cr√©er le dossier data/jobs
jobs_dir = project_root / "data" / "jobs"
jobs_dir.mkdir(parents=True, exist_ok=True)

# Sauvegarder le dataset complet
output_path = jobs_dir / "jobs_dataset.json"

dataset = {
    "metadata": {
        "total_jobs": len(all_jobs),
        "generated_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "categories": list(JOB_TEMPLATES.keys()),
        "description": "Dataset d'offres d'emploi fictives pour matching CV"
    },
    "jobs": all_jobs
}

with open(output_path, 'w', encoding='utf-8') as f:
    json.dump(dataset, f, indent=2, ensure_ascii=False)

print(f"‚úÖ Dataset sauvegard√© : {output_path}")
print(f"   ‚Ä¢ Taille : {output_path.stat().st_size / 1024:.1f} KB")

# Sauvegarder par cat√©gorie (optionnel)
print(f"\nüíæ Sauvegarde par cat√©gorie...")

for category in JOB_TEMPLATES.keys():
    category_jobs = [j for j in all_jobs if j['category'] == category]
    
    category_path = jobs_dir / f"jobs_{category}.json"
    
    with open(category_path, 'w', encoding='utf-8') as f:
        json.dump({
            "category": category,
            "total": len(category_jobs),
            "jobs": category_jobs
        }, f, indent=2, ensure_ascii=False)
    
    print(f"   ‚úÖ {category:20s} : {len(category_jobs)} offres ‚Üí {category_path.name}")

print(f"\n‚úÖ G√©n√©ration termin√©e avec succ√®s !")
print(f"\nüìÇ Fichiers cr√©√©s dans : {jobs_dir}")
print(f"   ‚Ä¢ jobs_dataset.json (dataset complet)")
print(f"   ‚Ä¢ jobs_ml_engineer.json")
print(f"   ‚Ä¢ jobs_data_scientist.json")
print(f"   ‚Ä¢ jobs_python_developer.json")
print(f"   ‚Ä¢ jobs_devops_engineer.json")
print(f"   ‚Ä¢ jobs_frontend_developer.json")


üíæ SAUVEGARDE DU DATASET
‚úÖ Dataset sauvegard√© : c:\Users\rober\OneDrive\Bureau\PFE\data\jobs\jobs_dataset.json
   ‚Ä¢ Taille : 28.2 KB

üíæ Sauvegarde par cat√©gorie...
   ‚úÖ ml_engineer          : 5 offres ‚Üí jobs_ml_engineer.json
   ‚úÖ data_scientist       : 5 offres ‚Üí jobs_data_scientist.json
   ‚úÖ python_developer     : 5 offres ‚Üí jobs_python_developer.json
   ‚úÖ devops_engineer      : 5 offres ‚Üí jobs_devops_engineer.json
   ‚úÖ frontend_developer   : 5 offres ‚Üí jobs_frontend_developer.json

‚úÖ G√©n√©ration termin√©e avec succ√®s !

üìÇ Fichiers cr√©√©s dans : c:\Users\rober\OneDrive\Bureau\PFE\data\jobs
   ‚Ä¢ jobs_dataset.json (dataset complet)
   ‚Ä¢ jobs_ml_engineer.json
   ‚Ä¢ jobs_data_scientist.json
   ‚Ä¢ jobs_python_developer.json
   ‚Ä¢ jobs_devops_engineer.json
   ‚Ä¢ jobs_frontend_developer.json


In [None]:
# V√©rifier que tout est OK
print("\n‚úÖ V√âRIFICATION FINALE")
print("=" * 60)

# V√©rifier que les fichiers existent
jobs_dir = project_root / "data" / "jobs"

if (jobs_dir / "jobs_dataset.json").exists():
    print("‚úÖ jobs_dataset.json cr√©√©")
    
    # Charger et v√©rifier
    with open(jobs_dir / "jobs_dataset.json", 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    print(f"   ‚Ä¢ {data['metadata']['total_jobs']} offres")
    print(f"   ‚Ä¢ G√©n√©r√© le {data['metadata']['generated_date']}")
    
    # V√©rifier la structure
    sample_job = data['jobs'][0]
    required_fields = [
        'job_id', 'title', 'company', 'location', 'type', 
        'experience', 'salary', 'description', 'requirements', 
        'nice_to_have', 'posted_date', 'url'
    ]
    
    missing_fields = [f for f in required_fields if f not in sample_job]
    
    if not missing_fields:
        print("‚úÖ Structure des offres valide")
    else:
        print(f"‚ö†Ô∏è  Champs manquants : {missing_fields}")
    
    # Statistiques finales
    print(f"\nüìä R√âSUM√â FINAL")
    print(f"   ‚Ä¢ Offres g√©n√©r√©es : {len(data['jobs'])}")
    print(f"   ‚Ä¢ Cat√©gories : {len(data['metadata']['categories'])}")
    print(f"   ‚Ä¢ Pr√™t pour l'√âtape 5 (Matching CV ‚Üî Offres)")
    
else:
    print("‚ùå Dataset non cr√©√©")


‚úÖ V√âRIFICATION FINALE
‚úÖ jobs_dataset.json cr√©√©
   ‚Ä¢ 25 offres
   ‚Ä¢ G√©n√©r√© le 2026-01-25 18:21:43
‚úÖ Structure des offres valide

üìä R√âSUM√â FINAL
   ‚Ä¢ Offres g√©n√©r√©es : 25
   ‚Ä¢ Cat√©gories : 5
   ‚Ä¢ Pr√™t pour l'√âtape 5 (Matching CV ‚Üî Offres)

üéâ √âtape 4 termin√©e avec succ√®s !
üìù Prochaine √©tape : Matching CV ‚Üî Offres (Notebook 05)
