Pour MAC

In [9]:
# Étape 1 : Import des bibliothèques
import requests
import sqlite3
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
import time

In [11]:


# 🔧 Configuration de base
config = {
    "keywords": ["data analyst", "data scientist", "business analyst", "VBA"],
    "location": "Nantes",
    "contract_types": ["CDI", "CDD", "Alternance"]
}

KEYWORDS = [k.lower() for k in config['keywords']]
LOCATION = config['location']
CONTRACTS = [c.lower() for c in config['contract_types']]

# 🔧 Initialisation de la base de données SQLite
conn = sqlite3.connect('jobs.db')
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS jobs (title TEXT, link TEXT UNIQUE, date TEXT)''')

# Fonction pour vérifier les critères de mots-clés et types de contrat
def matches_criteria(text):
    text = text.lower()
    return any(k in text for k in KEYWORDS) and any(c in text for c in CONTRACTS)

# 🔎 Scraper Indeed
def search_indeed():
    results = []
    headers = {"User-Agent": "Mozilla/5.0"}
    for keyword in KEYWORDS:
        url = f"https://www.indeed.fr/jobs?q={keyword.replace(' ', '+')}&l={LOCATION}"
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        for a in soup.find_all("a", href=True):
            href = a['href']
            title = a.get_text(strip=True)
            if href.startswith("/rc") and matches_criteria(title):
                link = "https://www.indeed.fr" + href
                results.append((title, link))
    return results

# 🔎 Scraper HelloWork
def search_hellowork():
    results = []
    headers = {"User-Agent": "Mozilla/5.0"}
    for keyword in KEYWORDS:
        url = f"https://www.hellowork.com/fr-fr/emploi/recherche.html?k={keyword.replace(' ', '+')}&l={LOCATION}"
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        for a in soup.find_all("a", href=True):
            href = a['href']
            title = a.get_text(strip=True)
            if "/emplois/" in href and matches_criteria(title):
                full_link = href if href.startswith("http") else "https://www.hellowork.com" + href
                results.append((title, full_link))
        time.sleep(1)
    return results

# 🔎 Scraper WelcomeToTheJungle
def search_wttj():
    results = []
    headers = {"User-Agent": "Mozilla/5.0"}
    for keyword in KEYWORDS:
        url = f"https://www.welcometothejungle.com/fr/jobs?query={keyword.replace(' ', '%20')}&aroundQuery={LOCATION}"
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        for a in soup.find_all("a", href=True):
            href = a['href']
            title = a.get_text(strip=True)
            if "/fr/companies/" in href and matches_criteria(title):
                full_link = href if href.startswith("http") else "https://www.welcometothejungle.com" + href
                results.append((title, full_link))
        time.sleep(1)
    return results

# 🔎 Scraper LinkedIn
def search_linkedin():
    results = []
    headers = {"User-Agent": "Mozilla/5.0"}
    for keyword in KEYWORDS:
        url = f"https://www.linkedin.com/jobs/search?keywords={keyword.replace(' ', '%20')}&location={LOCATION}"
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        for a in soup.find_all("a", href=True):
            href = a['href']
            title = a.get_text(strip=True)
            if "/jobs/view/" in href and matches_criteria(title):
                full_link = href if href.startswith("http") else "https://www.linkedin.com" + href
                results.append((title, full_link))
        time.sleep(1)
    return results

# 🔎 Scraper Monster
def search_monster():
    results = []
    headers = {"User-Agent": "Mozilla/5.0"}
    for keyword in KEYWORDS:
        url = f"https://www.monster.fr/jobs/search?q={keyword.replace(' ', '%20')}&l={LOCATION}"
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        for a in soup.find_all("a", href=True):
            href = a['href']
            title = a.get_text(strip=True)
            if "/offre" in href and matches_criteria(title):
                full_link = href if href.startswith("http") else "https://www.monster.fr" + href
                results.append((title, full_link))
        time.sleep(1)
    return results

# 🔎 Scraper Pôle Emploi
def search_pole_emploi():
    results = []
    headers = {"User-Agent": "Mozilla/5.0"}
    for keyword in KEYWORDS:
        url = f"https://www.pole-emploi.fr/l'emploi-et-la-formation/recherche-emploi.html?keywords={keyword.replace(' ', '%20')}&location={LOCATION}"
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        for a in soup.find_all("a", href=True):
            href = a['href']
            title = a.get_text(strip=True)
            if "/offre" in href and matches_criteria(title):
                full_link = href if href.startswith("http") else "https://www.pole-emploi.fr" + href
                results.append((title, full_link))
        time.sleep(1)
    return results

# 🔎 Scraper APEC
def search_apec():
    results = []
    headers = {"User-Agent": "Mozilla/5.0"}
    for keyword in KEYWORDS:
        url = f"https://www.apec.fr/candidat/recherche-offres-emploi.html?keywords={keyword.replace(' ', '%20')}&location={LOCATION}"
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        for a in soup.find_all("a", href=True):
            href = a['href']
            title = a.get_text(strip=True)
            if "/offre" in href and matches_criteria(title):
                full_link = href if href.startswith("http") else "https://www.apec.fr" + href
                results.append((title, full_link))
        time.sleep(1)
    return results

# 📂 Filtrer les nouvelles offres
def filter_new_jobs(jobs):
    new_jobs = []
    for title, link in jobs:
        try:
            c.execute("INSERT INTO jobs (title, link, date) VALUES (?, ?, ?)", (title, link, str(datetime.now())))
            new_jobs.append((title, link))
        except sqlite3.IntegrityError:
            continue
    conn.commit()
    return new_jobs

# 🚀 Lancer les scrapers pour tous les sites
indeed_jobs = search_indeed()
hellowork_jobs = search_hellowork()
wttj_jobs = search_wttj()
linkedin_jobs = search_linkedin()
monster_jobs = search_monster()
pole_emploi_jobs = search_pole_emploi()
apec_jobs = search_apec()

all_jobs = (indeed_jobs + hellowork_jobs + wttj_jobs + linkedin_jobs +
            monster_jobs + pole_emploi_jobs + apec_jobs)

new_jobs = filter_new_jobs(all_jobs)

# 💾 Sauvegarde des résultats dans Excel et visualisation
print("Nombre d'offres trouvées :", len(all_jobs))
print("Nouvelles offres enregistrées :", len(new_jobs))

pd.set_option("display.max_colwidth", None)
df = pd.DataFrame(new_jobs, columns=["Titre", "Lien"])
display(df)

# 🔥 Sauvegarde dans un fichier Excel
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
df.to_excel(f"offres_jobbot_{now}.xlsx", index=False)
print(f"✅ Fichier Excel créé : offres_jobbot_{now}.xlsx")

# Fermeture de la connexion à la base de données
conn.close()


Nombre d'offres trouvées : 19
Nouvelles offres enregistrées : 13


Unnamed: 0,Titre,Lien
0,Alternance- Data Analyst Junior (H/F),https://fr.linkedin.com/jobs/view/alternance-data-analyst-junior-h-f-at-totalenergies-4184827352?position=1&pageNum=0&refId=5PYegRkgRptX1XCA5PR1IA%3D%3D&trackingId=Cpis3QuuSXoZRi6qzmcvbA%3D%3D
1,Data Analyst en alternance F/H,https://fr.linkedin.com/jobs/view/data-analyst-en-alternance-f-h-at-kereis-4219678903?position=2&pageNum=0&refId=5PYegRkgRptX1XCA5PR1IA%3D%3D&trackingId=iQeNXqdEl%2Ft20J1jBqZfWQ%3D%3D
2,Data Analyst en Alternance H/F,https://fr.linkedin.com/jobs/view/data-analyst-en-alternance-h-f-at-kereis-france-4219552443?position=3&pageNum=0&refId=5PYegRkgRptX1XCA5PR1IA%3D%3D&trackingId=A5HhxXzkJBZ50IPiuvIFfQ%3D%3D
3,Data Analyst en alternance F/H,https://fr.linkedin.com/jobs/view/data-analyst-en-alternance-f-h-at-kereis-solutions-4218577503?position=5&pageNum=0&refId=5PYegRkgRptX1XCA5PR1IA%3D%3D&trackingId=QXMmapk7oKRE6CMRgJnOXQ%3D%3D
4,Data Analyst -CDI- Nantes,https://fr.linkedin.com/jobs/view/data-analyst-cdi-nantes-at-free-work-4219582682?position=8&pageNum=0&refId=5PYegRkgRptX1XCA5PR1IA%3D%3D&trackingId=RI%2FuYnQEicGiQlJbglhbZw%3D%3D
5,ALTERNANCE - DATA ANALYST (F/H) - NANTES,https://fr.linkedin.com/jobs/view/alternance-data-analyst-f-h-nantes-at-bpce-solutions-informatiques-4181921624?position=11&pageNum=0&refId=5PYegRkgRptX1XCA5PR1IA%3D%3D&trackingId=X5PjjtlzFu9VJNtBhQMI%2Bw%3D%3D
6,Data Analyst en alternance F/H - Kereis (H/F),https://fr.linkedin.com/jobs/view/data-analyst-en-alternance-f-h-kereis-h-f-at-kereis-4219275985?position=14&pageNum=0&refId=5PYegRkgRptX1XCA5PR1IA%3D%3D&trackingId=8vNtVjLnPqE6ZRNvLZ1cww%3D%3D
7,Alternance - Data Analyst H/F,https://fr.linkedin.com/jobs/view/alternance-data-analyst-h-f-at-ikks-4213549197?position=28&pageNum=0&refId=5PYegRkgRptX1XCA5PR1IA%3D%3D&trackingId=1qDD7Z5R6Iy31EmCPfKEKA%3D%3D
8,CDI Charge d'Etudes CRM - Data Analyst H/F,https://fr.linkedin.com/jobs/view/cdi-charge-d-etudes-crm-data-analyst-h-f-at-ikks-4209975470?position=44&pageNum=0&refId=5PYegRkgRptX1XCA5PR1IA%3D%3D&trackingId=jnsEE2s3xufb1sxWLF6GZQ%3D%3D
9,Business Analyst en alternance F/H,https://fr.linkedin.com/jobs/view/business-analyst-en-alternance-f-h-at-kereis-4212960331?position=50&pageNum=0&refId=hdlwCVij7fr67xb3R4hMFQ%3D%3D&trackingId=WGM8To5bj2BgYpNDVdxQ7Q%3D%3D


✅ Fichier Excel créé : offres_jobbot_2025-05-10_12-18-21.xlsx


Pour windows

In [13]:
# Étape 1 : Import des bibliothèques
import requests
import sqlite3
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
import time

In [14]:
# 🔍 JobBot en version Jupyter Notebook (version Windows avec sauvegarde Excel uniquement)



# Étape 2 : Charger la configuration
config = {
    "keywords": ["data analyst", "data scientist", "business analyst", "VBA"],
    "location": "Nantes",
    "contract_types": ["CDI", "CDD", "Alternance"]
}

KEYWORDS = [k.lower() for k in config['keywords']]
LOCATION = config['location']
CONTRACTS = [c.lower() for c in config['contract_types']]

# Étape 3 : Initialiser la base de données SQLite
conn = sqlite3.connect('jobs_windows.db')
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS jobs (title TEXT, link TEXT UNIQUE, date TEXT)''')

# Fonction pour vérifier mots-clés et types de contrat
def matches_criteria(text):
    text = text.lower()
    return any(k in text for k in KEYWORDS) and any(c in text for c in CONTRACTS)

# Étape 4 : Scraper Indeed
def search_indeed():
    results = []
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
    for keyword in KEYWORDS:
        url = f"https://www.indeed.fr/jobs?q={keyword.replace(' ', '+')}&l={LOCATION}"
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        for a in soup.find_all("a", href=True):
            href = a['href']
            title = a.get_text(strip=True)
            if href.startswith("/rc") and matches_criteria(title):
                link = "https://www.indeed.fr" + href
                results.append((title, link))
    return results

# Étape 4 bis : Scraper HelloWork
def search_hellowork():
    results = []
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
    for keyword in KEYWORDS:
        url = f"https://www.hellowork.com/fr-fr/emploi/recherche.html?k={keyword.replace(' ', '+')}&l={LOCATION}"
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        for a in soup.find_all("a", href=True):
            href = a['href']
            title = a.get_text(strip=True)
            if "/emplois/" in href and matches_criteria(title):
                full_link = href if href.startswith("http") else "https://www.hellowork.com" + href
                results.append((title, full_link))
        time.sleep(1)
    return results

# Étape 4 ter : Scraper WelcomeToTheJungle
def search_wttj():
    results = []
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
    for keyword in KEYWORDS:
        url = f"https://www.welcometothejungle.com/fr/jobs?query={keyword.replace(' ', '%20')}&aroundQuery={LOCATION}"
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        for a in soup.find_all("a", href=True):
            href = a['href']
            title = a.get_text(strip=True)
            if "/fr/companies/" in href and matches_criteria(title):
                full_link = href if href.startswith("http") else "https://www.welcometothejungle.com" + href
                results.append((title, full_link))
        time.sleep(1)
    return results

# Étape 4 quater : Scraper LinkedIn
def search_linkedin():
    results = []
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
    for keyword in KEYWORDS:
        url = f"https://www.linkedin.com/jobs/search?keywords={keyword.replace(' ', '%20')}&location={LOCATION}"
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        for a in soup.find_all("a", href=True):
            href = a['href']
            title = a.get_text(strip=True)
            if "/jobs/view/" in href and matches_criteria(title):
                full_link = href if href.startswith("http") else "https://www.linkedin.com" + href
                results.append((title, full_link))
        time.sleep(1)
    return results

# Étape 5 : Filtrer les offres non encore vues
def filter_new_jobs(jobs):
    new_jobs = []
    for title, link in jobs:
        try:
            c.execute("INSERT INTO jobs (title, link, date) VALUES (?, ?, ?)", (title, link, str(datetime.now())))
            new_jobs.append((title, link))
        except sqlite3.IntegrityError:
            continue
    conn.commit()
    return new_jobs

# Étape 6 : Lancer le bot et sauvegarder en Excel
indeed_jobs = search_indeed()
hellowork_jobs = search_hellowork()
wttj_jobs = search_wttj()
linkedin_jobs = search_linkedin()

all_jobs = indeed_jobs + hellowork_jobs + wttj_jobs + linkedin_jobs
new_jobs = filter_new_jobs(all_jobs)

# Étape 7 : Visualiser dans le notebook et sauvegarder en Excel
print("Nombre d'offres trouvées :", len(all_jobs))
print("Nouvelles offres enregistrées :", len(new_jobs))

pd.set_option("display.max_colwidth", None)
df = pd.DataFrame(new_jobs, columns=["Titre", "Lien"])
display(df)

# 🔥 Sauvegarde dans un fichier Excel
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
df.to_excel(f"offres_jobbot_windows_{now}.xlsx", index=False)
print(f"✅ Fichier Excel créé : offres_jobbot_windows_{now}.xlsx")

Nombre d'offres trouvées : 19
Nouvelles offres enregistrées : 13


Unnamed: 0,Titre,Lien
0,Alternance- Data Analyst Junior (H/F),https://fr.linkedin.com/jobs/view/alternance-data-analyst-junior-h-f-at-totalenergies-4184827352?position=1&pageNum=0&refId=vYWtifRmw5LXT5dtSltdrg%3D%3D&trackingId=nof0skfajV3kspxaI1ZYow%3D%3D
1,Data Analyst en alternance F/H,https://fr.linkedin.com/jobs/view/data-analyst-en-alternance-f-h-at-kereis-4219678903?position=2&pageNum=0&refId=vYWtifRmw5LXT5dtSltdrg%3D%3D&trackingId=QJw%2BocfMlktTLomMGXICnA%3D%3D
2,Data Analyst en Alternance H/F,https://fr.linkedin.com/jobs/view/data-analyst-en-alternance-h-f-at-kereis-france-4219552443?position=3&pageNum=0&refId=vYWtifRmw5LXT5dtSltdrg%3D%3D&trackingId=QRB4ofn3a3dqXloruYp5pQ%3D%3D
3,Data Analyst en alternance F/H,https://fr.linkedin.com/jobs/view/data-analyst-en-alternance-f-h-at-kereis-solutions-4218577503?position=5&pageNum=0&refId=vYWtifRmw5LXT5dtSltdrg%3D%3D&trackingId=PtLASEmkYsWN%2BPXDTBOQHg%3D%3D
4,Data Analyst -CDI- Nantes,https://fr.linkedin.com/jobs/view/data-analyst-cdi-nantes-at-free-work-4219582682?position=8&pageNum=0&refId=vYWtifRmw5LXT5dtSltdrg%3D%3D&trackingId=im72Dz8v4d4CfpR0nJ7Zeg%3D%3D
5,ALTERNANCE - DATA ANALYST (F/H) - NANTES,https://fr.linkedin.com/jobs/view/alternance-data-analyst-f-h-nantes-at-bpce-solutions-informatiques-4181921624?position=11&pageNum=0&refId=vYWtifRmw5LXT5dtSltdrg%3D%3D&trackingId=cupW0jXHIol1UvZbzePRnQ%3D%3D
6,Data Analyst en alternance F/H - Kereis (H/F),https://fr.linkedin.com/jobs/view/data-analyst-en-alternance-f-h-kereis-h-f-at-kereis-4219275985?position=14&pageNum=0&refId=vYWtifRmw5LXT5dtSltdrg%3D%3D&trackingId=Uw0d5bPKfQ5CgmcwP%2FwXMw%3D%3D
7,Alternance - Data Analyst H/F,https://fr.linkedin.com/jobs/view/alternance-data-analyst-h-f-at-ikks-4213549197?position=28&pageNum=0&refId=vYWtifRmw5LXT5dtSltdrg%3D%3D&trackingId=WNs5m6fpBWGJQpYY6oS02w%3D%3D
8,CDI Charge d'Etudes CRM - Data Analyst H/F,https://fr.linkedin.com/jobs/view/cdi-charge-d-etudes-crm-data-analyst-h-f-at-ikks-4209975470?position=44&pageNum=0&refId=vYWtifRmw5LXT5dtSltdrg%3D%3D&trackingId=vmV6YtgxDvk7OJN1AsXGfg%3D%3D
9,Business Analyst en alternance F/H,https://fr.linkedin.com/jobs/view/business-analyst-en-alternance-f-h-at-kereis-4212960331?position=49&pageNum=0&refId=JTdXGbeBgZZ0%2FLi5TYE%2F0Q%3D%3D&trackingId=Wp0IKucgjw3bn8ecobagAw%3D%3D


✅ Fichier Excel créé : offres_jobbot_windows_2025-05-10_12-18-41.xlsx


In [15]:
import requests
import sqlite3
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
import time

# Étape 2 : Charger la configuration
config = {
    "keywords": ["data analyst", "data scientist", "business analyst", "VBA"],
    "location": "Nantes",
    "contract_types": ["CDI", "CDD", "Alternance"]
}

KEYWORDS = [k.lower() for k in config['keywords']]
LOCATION = config['location']
CONTRACTS = [c.lower() for c in config['contract_types']]

# Étape 3 : Initialiser la base de données SQLite
conn = sqlite3.connect('jobs_windows.db')
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS jobs (title TEXT, link TEXT UNIQUE, date TEXT)''')

# Fonction pour vérifier mots-clés et types de contrat
def matches_criteria(text):
    text = text.lower()
    return any(k in text for k in KEYWORDS) and any(c in text for c in CONTRACTS)

# Étape 4 : Scraper Indeed
def search_indeed():
    results = []
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
    }
    for keyword in KEYWORDS:
        url = f"https://www.indeed.fr/jobs?q={keyword.replace(' ', '+')}&l={LOCATION}"
        try:
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()  # Vérifie s'il y a une erreur dans la réponse
            soup = BeautifulSoup(response.text, 'html.parser')
            for a in soup.find_all("a", href=True):
                href = a['href']
                title = a.get_text(strip=True)
                if href.startswith("/rc") and matches_criteria(title):
                    link = "https://www.indeed.fr" + href
                    results.append((title, link))
            time.sleep(1)  # Ajouter un délai pour limiter les requêtes
        except requests.exceptions.RequestException as e:
            print(f"Erreur lors de la récupération de {url}: {e}")
            continue
    return results

# Étape 4 bis : Scraper HelloWork
def search_hellowork():
    results = []
    headers = {"User-Agent": "Mozilla/5.0"}
    for keyword in KEYWORDS:
        url = f"https://www.hellowork.com/fr-fr/emploi/recherche.html?k={keyword.replace(' ', '+')}&l={LOCATION}"
        try:
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            for a in soup.find_all("a", href=True):
                href = a['href']
                title = a.get_text(strip=True)
                if "/emplois/" in href and matches_criteria(title):
                    full_link = href if href.startswith("http") else "https://www.hellowork.com" + href
                    results.append((title, full_link))
            time.sleep(1)
        except requests.exceptions.RequestException as e:
            print(f"Erreur lors de la récupération de {url}: {e}")
            continue
    return results

# Étape 4 ter : Scraper WelcomeToTheJungle
def search_wttj():
    results = []
    headers = {"User-Agent": "Mozilla/5.0"}
    for keyword in KEYWORDS:
        url = f"https://www.welcometothejungle.com/fr/jobs?query={keyword.replace(' ', '%20')}&aroundQuery={LOCATION}"
        try:
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            for a in soup.find_all("a", href=True):
                href = a['href']
                title = a.get_text(strip=True)
                if "/fr/companies/" in href and matches_criteria(title):
                    full_link = href if href.startswith("http") else "https://www.welcometothejungle.com" + href
                    results.append((title, full_link))
            time.sleep(1)
        except requests.exceptions.RequestException as e:
            print(f"Erreur lors de la récupération de {url}: {e}")
            continue
    return results

# Étape 4 quater : Scraper LinkedIn
def search_linkedin():
    results = []
    headers = {"User-Agent": "Mozilla/5.0"}
    for keyword in KEYWORDS:
        url = f"https://www.linkedin.com/jobs/search?keywords={keyword.replace(' ', '%20')}&location={LOCATION}"
        try:
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            for a in soup.find_all("a", href=True):
                href = a['href']
                title = a.get_text(strip=True)
                if "/jobs/view/" in href and matches_criteria(title):
                    full_link = href if href.startswith("http") else "https://www.linkedin.com" + href
                    results.append((title, full_link))
            time.sleep(1)
        except requests.exceptions.RequestException as e:
            print(f"Erreur lors de la récupération de {url}: {e}")
            continue
    return results

# Étape 5 : Filtrer les offres non encore vues
def filter_new_jobs(jobs):
    new_jobs = []
    for title, link in jobs:
        try:
            c.execute("INSERT INTO jobs (title, link, date) VALUES (?, ?, ?)", (title, link, str(datetime.now())))
            new_jobs.append((title, link))
        except sqlite3.IntegrityError:
            continue
    conn.commit()
    return new_jobs

# Étape 6 : Lancer le bot et sauvegarder en Excel
indeed_jobs = search_indeed()
hellowork_jobs = search_hellowork()
wttj_jobs = search_wttj()
linkedin_jobs = search_linkedin()

all_jobs = indeed_jobs + hellowork_jobs + wttj_jobs + linkedin_jobs
new_jobs = filter_new_jobs(all_jobs)

# Étape 7 : Visualiser dans le notebook et sauvegarder en Excel
print("Nombre d'offres trouvées :", len(all_jobs))
print("Nouvelles offres enregistrées :", len(new_jobs))

pd.set_option("display.max_colwidth", None)
df = pd.DataFrame(new_jobs, columns=["Titre", "Lien"])
display(df)

# 🔥 Sauvegarde dans un fichier Excel
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
df.to_excel(f"offres_jobbot_windows_{now}.xlsx", index=False)
print(f"✅ Fichier Excel créé : offres_jobbot_windows_{now}.xlsx")


Erreur lors de la récupération de https://www.indeed.fr/jobs?q=data+analyst&l=Nantes: 403 Client Error: Forbidden for url: https://fr.indeed.com/jobs
Erreur lors de la récupération de https://www.indeed.fr/jobs?q=data+scientist&l=Nantes: 403 Client Error: Forbidden for url: https://fr.indeed.com/jobs
Erreur lors de la récupération de https://www.indeed.fr/jobs?q=business+analyst&l=Nantes: 403 Client Error: Forbidden for url: https://fr.indeed.com/jobs
Erreur lors de la récupération de https://www.indeed.fr/jobs?q=vba&l=Nantes: 403 Client Error: Forbidden for url: https://fr.indeed.com/jobs
Nombre d'offres trouvées : 19
Nouvelles offres enregistrées : 13


Unnamed: 0,Titre,Lien
0,Alternance- Data Analyst Junior (H/F),https://fr.linkedin.com/jobs/view/alternance-data-analyst-junior-h-f-at-totalenergies-4184827352?position=1&pageNum=0&refId=FOPCjiUb2C0HTJ367025DA%3D%3D&trackingId=B8JbQTV4aRU4tLOi4bFwzg%3D%3D
1,Data Analyst en alternance F/H,https://fr.linkedin.com/jobs/view/data-analyst-en-alternance-f-h-at-kereis-4219678903?position=2&pageNum=0&refId=FOPCjiUb2C0HTJ367025DA%3D%3D&trackingId=LxEI6C%2Bm8oz%2FAz8BtWXB8Q%3D%3D
2,Data Analyst en Alternance H/F,https://fr.linkedin.com/jobs/view/data-analyst-en-alternance-h-f-at-kereis-france-4219552443?position=3&pageNum=0&refId=FOPCjiUb2C0HTJ367025DA%3D%3D&trackingId=3i9AlB%2Fe6zOIf969KSZKag%3D%3D
3,Data Analyst en alternance F/H,https://fr.linkedin.com/jobs/view/data-analyst-en-alternance-f-h-at-kereis-solutions-4218577503?position=5&pageNum=0&refId=FOPCjiUb2C0HTJ367025DA%3D%3D&trackingId=d%2FxxEmHqyd8iK%2BY71H0zDA%3D%3D
4,Data Analyst -CDI- Nantes,https://fr.linkedin.com/jobs/view/data-analyst-cdi-nantes-at-free-work-4219582682?position=8&pageNum=0&refId=FOPCjiUb2C0HTJ367025DA%3D%3D&trackingId=iw5wD%2BnfLzRuFPK7myWCnA%3D%3D
5,ALTERNANCE - DATA ANALYST (F/H) - NANTES,https://fr.linkedin.com/jobs/view/alternance-data-analyst-f-h-nantes-at-bpce-solutions-informatiques-4181921624?position=11&pageNum=0&refId=FOPCjiUb2C0HTJ367025DA%3D%3D&trackingId=WIHxvUvrOiYdkLfyJ5UwgQ%3D%3D
6,Data Analyst en alternance F/H - Kereis (H/F),https://fr.linkedin.com/jobs/view/data-analyst-en-alternance-f-h-kereis-h-f-at-kereis-4219275985?position=14&pageNum=0&refId=FOPCjiUb2C0HTJ367025DA%3D%3D&trackingId=oOSroHxA6anQ7qhpkVJ9hQ%3D%3D
7,Alternance - Data Analyst H/F,https://fr.linkedin.com/jobs/view/alternance-data-analyst-h-f-at-ikks-4213549197?position=28&pageNum=0&refId=FOPCjiUb2C0HTJ367025DA%3D%3D&trackingId=GnuKHrtHXy%2BoPMbpTXMSqw%3D%3D
8,CDI Charge d'Etudes CRM - Data Analyst H/F,https://fr.linkedin.com/jobs/view/cdi-charge-d-etudes-crm-data-analyst-h-f-at-ikks-4209975470?position=44&pageNum=0&refId=FOPCjiUb2C0HTJ367025DA%3D%3D&trackingId=wFveWd6FZq0EMq6tvWe89A%3D%3D
9,Business Analyst en alternance F/H,https://fr.linkedin.com/jobs/view/business-analyst-en-alternance-f-h-at-kereis-4212960331?position=49&pageNum=0&refId=NOsaO%2FS9N62dGSijA15i6w%3D%3D&trackingId=tPiVC8vA%2FU%2F6jC4WOn1Dkw%3D%3D


✅ Fichier Excel créé : offres_jobbot_windows_2025-05-10_12-19-01.xlsx


In [16]:
import requests
import sqlite3
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
import time

# Étape 2 : Charger la configuration
config = {
    "keywords": ["data analyst", "data scientist", "business analyst", "VBA"],
    "location": "Nantes",
    "contract_types": ["CDI", "CDD", "Alternance"]
}

KEYWORDS = [k.lower() for k in config['keywords']]
LOCATION = config['location']
CONTRACTS = [c.lower() for c in config['contract_types']]

# Étape 3 : Initialiser la base de données SQLite
conn = sqlite3.connect('jobs_windows.db')
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS jobs (title TEXT, link TEXT UNIQUE, date TEXT)''')

# Fonction pour vérifier mots-clés et types de contrat
def matches_criteria(text):
    text = text.lower()
    return any(k in text for k in KEYWORDS) and any(c in text for c in CONTRACTS)

# Étape 4 : Scraper Indeed avec session
def search_indeed():
    results = []
    session = requests.Session()  # Utilisation d'une session pour gérer les cookies
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
    }
    session.headers.update(headers)  # Appliquer les headers à la session
    for keyword in KEYWORDS:
        url = f"https://www.indeed.fr/jobs?q={keyword.replace(' ', '+')}&l={LOCATION}"
        try:
            response = session.get(url, timeout=10)
            response.raise_for_status()  # Vérifie s'il y a une erreur dans la réponse
            soup = BeautifulSoup(response.text, 'html.parser')
            for a in soup.find_all("a", href=True):
                href = a['href']
                title = a.get_text(strip=True)
                if href.startswith("/rc") and matches_criteria(title):
                    link = "https://www.indeed.fr" + href
                    results.append((title, link))
            time.sleep(2)  # Ajouter un délai plus long pour limiter les requêtes
        except requests.exceptions.RequestException as e:
            print(f"Erreur lors de la récupération de {url}: {e}")
            continue
    return results

# Autres fonctions pour HelloWork, WelcomeToTheJungle, LinkedIn restent inchangées

# Étape 5 : Filtrer les offres non encore vues
def filter_new_jobs(jobs):
    new_jobs = []
    for title, link in jobs:
        try:
            c.execute("INSERT INTO jobs (title, link, date) VALUES (?, ?, ?)", (title, link, str(datetime.now())))
            new_jobs.append((title, link))
        except sqlite3.IntegrityError:
            continue
    conn.commit()
    return new_jobs

# Étape 6 : Lancer le bot et sauvegarder en Excel
indeed_jobs = search_indeed()
hellowork_jobs = search_hellowork()
wttj_jobs = search_wttj()
linkedin_jobs = search_linkedin()

all_jobs = indeed_jobs + hellowork_jobs + wttj_jobs + linkedin_jobs
new_jobs = filter_new_jobs(all_jobs)

# Étape 7 : Visualiser dans le notebook et sauvegarder en Excel
print("Nombre d'offres trouvées :", len(all_jobs))
print("Nouvelles offres enregistrées :", len(new_jobs))

pd.set_option("display.max_colwidth", None)
df = pd.DataFrame(new_jobs, columns=["Titre", "Lien"])
display(df)

# 🔥 Sauvegarde dans un fichier Excel
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
df.to_excel(f"offres_jobbot_windows_{now}.xlsx", index=False)
print(f"✅ Fichier Excel créé : offres_jobbot_windows_{now}.xlsx")


Erreur lors de la récupération de https://www.indeed.fr/jobs?q=data+analyst&l=Nantes: 403 Client Error: Forbidden for url: https://fr.indeed.com/jobs
Erreur lors de la récupération de https://www.indeed.fr/jobs?q=data+scientist&l=Nantes: 403 Client Error: Forbidden for url: https://fr.indeed.com/jobs
Erreur lors de la récupération de https://www.indeed.fr/jobs?q=business+analyst&l=Nantes: 403 Client Error: Forbidden for url: https://fr.indeed.com/jobs
Erreur lors de la récupération de https://www.indeed.fr/jobs?q=vba&l=Nantes: 403 Client Error: Forbidden for url: https://fr.indeed.com/jobs
Nombre d'offres trouvées : 16
Nouvelles offres enregistrées : 10


Unnamed: 0,Titre,Lien
0,Alternance- Data Analyst Junior (H/F),https://fr.linkedin.com/jobs/view/alternance-data-analyst-junior-h-f-at-totalenergies-4184827352?position=1&pageNum=0&refId=6LJxXiHnBXMoAHhuZ%2BGCQA%3D%3D&trackingId=gpaMF6CwVplZhi4wiSD57A%3D%3D
1,Data Analyst en alternance F/H,https://fr.linkedin.com/jobs/view/data-analyst-en-alternance-f-h-at-kereis-4219678903?position=2&pageNum=0&refId=6LJxXiHnBXMoAHhuZ%2BGCQA%3D%3D&trackingId=MBTTzwwlijUja6w%2B6oefsQ%3D%3D
2,Data Analyst en alternance F/H,https://fr.linkedin.com/jobs/view/data-analyst-en-alternance-f-h-at-kereis-solutions-4218577503?position=4&pageNum=0&refId=6LJxXiHnBXMoAHhuZ%2BGCQA%3D%3D&trackingId=XVB7xKARmGFaSgqtm0X9%2Fg%3D%3D
3,Data Analyst -CDI- Nantes,https://fr.linkedin.com/jobs/view/data-analyst-cdi-nantes-at-free-work-4219582682?position=6&pageNum=0&refId=6LJxXiHnBXMoAHhuZ%2BGCQA%3D%3D&trackingId=BA2MNwWfrtLvk6FbWy1ACA%3D%3D
4,ALTERNANCE - DATA ANALYST (F/H) - NANTES,https://fr.linkedin.com/jobs/view/alternance-data-analyst-f-h-nantes-at-bpce-solutions-informatiques-4181921624?position=8&pageNum=0&refId=6LJxXiHnBXMoAHhuZ%2BGCQA%3D%3D&trackingId=qv9Z1A5NuTeCEcN4KdIwwA%3D%3D
5,Data Analyst en alternance F/H - Kereis (H/F),https://fr.linkedin.com/jobs/view/data-analyst-en-alternance-f-h-kereis-h-f-at-kereis-4219275985?position=11&pageNum=0&refId=6LJxXiHnBXMoAHhuZ%2BGCQA%3D%3D&trackingId=D2lpxMzwLUNZj9JoL6ndQg%3D%3D
6,Business Analyst en alternance F/H,https://fr.linkedin.com/jobs/view/business-analyst-en-alternance-f-h-at-kereis-4212960331?position=50&pageNum=0&refId=8dQbYIhPSZw4KQUUrExUYQ%3D%3D&trackingId=X%2B2NvV58tJl5oV4%2FDVY9sQ%3D%3D
7,Business Analyst en Alternance H/F,https://fr.linkedin.com/jobs/view/business-analyst-en-alternance-h-f-at-kereis-france-4209974600?position=51&pageNum=0&refId=8dQbYIhPSZw4KQUUrExUYQ%3D%3D&trackingId=D1RWwq2zHyPrFgm9XzCzag%3D%3D
8,Business Analyst en alternance F/H,https://fr.linkedin.com/jobs/view/business-analyst-en-alternance-f-h-at-kereis-solutions-4218041963?position=52&pageNum=0&refId=8dQbYIhPSZw4KQUUrExUYQ%3D%3D&trackingId=jcOc6Bq%2FQhI5BG438DbtDg%3D%3D
9,Alternance - Business Analyst Pricing & Développement Réseau Stations - AS 24,https://fr.linkedin.com/jobs/view/alternance-business-analyst-pricing-d%C3%A9veloppement-r%C3%A9seau-stations-as-24-at-totalenergies-4173784476?position=58&pageNum=0&refId=8dQbYIhPSZw4KQUUrExUYQ%3D%3D&trackingId=j4d8WkV8k0r98oE4rIwbKg%3D%3D


✅ Fichier Excel créé : offres_jobbot_windows_2025-05-10_12-19-21.xlsx
