In [1]:
import sys
sys.path.append('../')

from src.jsearch_scraper import JSearchScraper
from src.data_manager import DataManager
from config import SEARCH_QUERIES

In [2]:
# Vérifier la configuration API
from config import RAPIDAPI_KEY, RAPIDAPI_HOST

if RAPIDAPI_KEY:
    print("✓ Clé API configurée")
    print(f"Host: {RAPIDAPI_HOST}")
else:
    print("❌ ERREUR: Créez un fichier .env avec RAPIDAPI_KEY")

✓ Clé API configurée
Host: jsearch.p.rapidapi.com


In [3]:
#  Lancer le scraping
scraper = JSearchScraper()

# Scraper les offres (adapter selon vos besoins)
scraper.search_jobs("Python developer", "France", pages=1)
scraper.search_jobs("Data scientist", "France", pages=1)
scraper.search_jobs("Frontend developer", "France", pages=1)

print(f"\n📊 Total offres collectées: {len(scraper.get_jobs())}")


🔍 Recherche: Python developer - France
   Page 1: 10 offres trouvées

🔍 Recherche: Data scientist - France
   Page 1: 10 offres trouvées

🔍 Recherche: Frontend developer - France
   Page 1: 10 offres trouvées

📊 Total offres collectées: 30


In [4]:
# Afficher les statistiques
scraper.get_stats()


STATISTIQUES DU SCRAPING
Total offres scrappées: 30
Erreurs: 0

Top 5 entreprises:
  - Walmart: 2 offres
  - KATMAI: 1 offres
  - DRC Systems: 1 offres
  - Pyramid Consulting, Inc: 1 offres
  - Realign LLC: 1 offres

Top 5 localisations:
  - Washington, DC: 12 offres
  - McLean, VA: 6 offres
  - Bethesda, MD: 2 offres
  - Alexandria, VA: 2 offres
  - Annapolis, MD: 2 offres


In [5]:
# Sauvegarder les données
scraper.save_to_json()


✓ 30 offres sauvegardées dans C:\Users\ronaldo.heritiana\Documents\documents\Livre\job\job-recommendation-system\data\scraped_jobs.json


In [6]:
# Charger et tester avec le matcher
dm = DataManager()
dm.load_profiles()
dm.load_scraped_jobs()  # Charger les jobs scrappés au lieu des samples

print(f"\nProfils: {len(dm.profiles)}")
print(f"Offres: {len(dm.jobs)}")

# Afficher quelques offres
print("\nPremières offres scrappées:")
for job in dm.jobs[:3]:
    print(f"  - {job['title']} @ {job['company']} ({job['location']})")

✓ 2 profils chargés
✓ 30 offres scrappées chargées

Profils: 2
Offres: 30

Premières offres scrappées:
  - Python Developer (SAS Experience) - REMOTE @ KATMAI (Washington, DC)
  - Full Stack Developer - Python @ DRC Systems (McLean, VA)
  - Python Fullstack Developer( Python ,Angular and AWS) @ Pyramid Consulting, Inc (McLean, VA)


In [9]:
# Tester le matching avec vraies données
from src.matcher import SimpleMatcher
from src.utils import print_recommendations

matcher = SimpleMatcher()

# Tester pour le premier profil
# profile = dm.profiles[0]
# recommendations = matcher.recommend(profile, dm.jobs)

# print_recommendations(profile['name'], recommendations)

# Tester à nouveau
matcher_new = SimpleMatcher()
profile = dm.profiles[1]
recommendations = matcher_new.recommend(profile, dm.jobs)
print_recommendations(profile['name'], recommendations)


Recommandations pour Marie Martin

1. Staff Software Engineer (Frontend) - Twenty
   Location: Washington, DC
   Salary: None - None
   Score de match: 8.76%
   URL: https://www.linkedin.com/jobs/view/staff-software-engineer-frontend-at-twenty-4311059453?utm_campaign=google_jobs_apply&utm_source=google_jobs_apply&utm_medium=organic


In [16]:
# Cellule Debug 1 : Analyser les scores détaillés
print("ANALYSE DÉTAILLÉE DES SCORES")
print("="*80)

profile = dm.profiles[0]
print(f"\n👤 Profil: {profile['name']}")
print(f"   Compétences: {profile['skills']}")
print(f"   Keywords: {profile['keywords']}")
print(f"   Localisation: {profile['desired_location']}")

# Calculer les scores pour toutes les offres
all_scores = []
for job in dm.jobs:
    similarities = matcher.calculate_similarity(profile, [job])
    base_score = similarities[0]
    final_score = matcher.add_score_filtering(profile, job, base_score)
    
    all_scores.append({
        'title': job['title'],
        'company': job['company'],
        'location': job['location'],
        'base_score': base_score,
        'final_score': final_score,
        'description': job['description'][:100] + '...'
    })

# Trier par score
all_scores.sort(key=lambda x: x['final_score'], reverse=True)

# Afficher top 10
print(f"\n📊 TOP 10 offres par score:")
print("-"*80)
for i, score_data in enumerate(all_scores[:10], 1):
    print(f"{i}. {score_data['title']}")
    print(f"   Company: {score_data['company']}")
    print(f"   Location: {score_data['location']}")
    print(f"   Score de base (TF-IDF): {score_data['base_score']:.2%}")
    print(f"   Score final (+ filtres): {score_data['final_score']:.2%}")
    print()

# Afficher statistiques
print(f"\n📈 STATISTIQUES DES SCORES:")
print("-"*80)
scores_values = [s['final_score'] for s in all_scores]
print(f"Score minimum: {min(scores_values):.2%}")
print(f"Score maximum: {max(scores_values):.2%}")
print(f"Score moyen: {sum(scores_values)/len(scores_values):.2%}")
print(f"Seuil actuel: {0.3:.2%}")
print(f"Offres au-dessus du seuil: {len([s for s in scores_values if s >= 0.3])}")


# Cellule Debug 2 : Vérifier les compétences extraites
print("\n\nANALYSE DES COMPÉTENCES EXTRAITES")
print("="*80)

print(f"\nCompétences du profil: {profile['skills']}")
print(f"\nCompétences trouvées dans les offres:")

skills_found = {}
for job in dm.jobs[:5]:  # Vérifier les 5 premières offres
    job_id = job['id']
    skills = job.get('required_skills', [])
    print(f"\n{job['title']}:")
    print(f"  Compétences extraites: {skills if skills else 'Aucune'}")


# Cellule Debug 3 : Baisser le seuil minimum pour tester
print("\n\nTEST AVEC SEUIL MINIMUM RÉDUIT")
print("="*80)

from config import MIN_MATCH_SCORE

# Temporairement baisser le seuil
old_threshold = MIN_MATCH_SCORE

recommendations = [r for r in all_scores if r['final_score'] >= 0.1]  # Seuil à 0.1

print(f"\nAvec un seuil de 0.1 ({len(recommendations)} offres):")
for i, rec in enumerate(recommendations[:5], 1):
    print(f"{i}. {rec['title']} - {rec['company']}")
    print(f"   Score: {rec['final_score']:.2%}\n")

ANALYSE DÉTAILLÉE DES SCORES

👤 Profil: Jean Dupont
   Compétences: ['Python', 'Machine Learning', 'Data Analysis']
   Keywords: développeur IA data science
   Localisation: Paris

📊 TOP 10 offres par score:
--------------------------------------------------------------------------------
1. Data Scientist (Senior)
   Company: Spry Methods, Inc
   Location: Washington, DC
   Score de base (TF-IDF): 16.36%
   Score final (+ filtres): 16.36%

2. Data Scientist - Mid level
   Company: Nalley Consulting
   Location: Washington, DC
   Score de base (TF-IDF): 16.01%
   Score final (+ filtres): 16.01%

3. Data Scientist SME
   Company: Leidos
   Location: Alexandria, VA
   Score de base (TF-IDF): 15.26%
   Score final (+ filtres): 15.26%

4. Data Scientist   Identity Focus
   Company: PlanIT Group, LLC
   Location: Falls Church, VA
   Score de base (TF-IDF): 9.45%
   Score final (+ filtres): 9.45%

5. Data Scientist, Mid level
   Company: Planet Technologies
   Location: Washington, DC
   Scor