In [11]:
import os
import time
import requests
from bs4 import BeautifulSoup

from dotenv import load_dotenv

import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

load_dotenv()

True

In [12]:
import logging
from logging.handlers import RotatingFileHandler

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

file_formatter = logging.Formatter(
    "%(asctime)s - %(name)s - %(levelname)s - %(message)s")
log_dir = os.path.join(os.getcwd(), "..", "logs")
file_handler = RotatingFileHandler(
    filename=os.path.join(log_dir, "scraper.log"),
    maxBytes=10*1024*1024,
    backupCount=5
)
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(file_formatter)

logger.addHandler(file_handler)

In [None]:
import time
import traceback


def check_for_new_offers(location, url):
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        search_result = soup.find("h2", class_="SearchResults-desktop")

        if search_result:
            message = search_result.get_text(strip=True)

            if "Aucun logement" not in message:
                individual_appartments_counter = get_inidividual_appartment_offers_count(
                    soup)

                if individual_appartments_counter > 0:
                    log_content(location, message)
                    # send_email(location, message, url,
                    #            os.getenv("RECEIVER_EMAIL"))
                    send_email(location, message, url, os.getenv("RECEIVER_EMAIL"))


def get_inidividual_appartment_offers_count(soup):
    card_classes = "fr-col-12 fr-col-sm-6 fr-col-md-4 svelte-11sc5my fr-col-lg-4"
    aprtment_cards = soup.find_all("li", class_=card_classes)

    individual_appartments_counter = 0
    for card in aprtment_cards:
        card_detail_classes = "fr-card__detail fr-icon-group-fill"
        card_detail = card.find("p", class_=card_detail_classes)
        apartment_title = card.find(
            "h3", class_="fr-card__title").get_text(strip=True)

        if apartment_title in ["TOLBIAC"]:
            continue

        card_colocation_or_individual_text = card_detail.get_text(strip=True)
        # if card_colocation_or_individual_text == "Colocation":
        #     continue
        # elif card_colocation_or_individual_text == "Individuel":

        log_content("Individual Appartment found", apartment_title)
        individual_appartments_counter += 1

    return individual_appartments_counter


def log_content(location, message):
    timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
    log_entry = f"{timestamp} - {location}: {message}\n"

    logger.info(log_entry)
    print(log_entry)


def send_email(location, message, url, receiver_email):
    sender_email = os.getenv("SENDER_EMAIL")
    sender_password = os.getenv("SENDER_PASSWORD")

    subject = "New Apartment Offers"
    body = f"There are new apartment offers in \
        {location}: {message}\nurl : {url}"

    msg = MIMEMultipart()
    msg["From"] = sender_email
    msg["To"] = receiver_email
    msg["Subject"] = subject

    msg.attach(MIMEText(body, "plain"))

    try:
        with smtplib.SMTP("smtp.gmail.com", 587) as server:
            server.starttls()
            server.login(sender_email, sender_password)
            text = msg.as_string()
            server.sendmail(sender_email, receiver_email, text)
            print("*"*50)
            print(f"Email notification sent for {location}")
    except Exception as e:
        print(f"Error sending email: {e}")
        traceback.print_exc()

In [14]:
url_1 = "https://trouverunlogement.lescrous.fr/tools/37/search?bounds=2.224122_48.902156_2.4697602_48.8155755"
url_2 = "https://trouverunlogement.lescrous.fr/tools/37/search?bounds=2.3456154_48.8420887_2.3481533_48.8407925"
url_3 = "https://trouverunlogement.lescrous.fr/tools/37/search?bounds=2.3475418_48.8403664_2.349218_48.8392315"
url_4 = "https://trouverunlogement.lescrous.fr/tools/37/search?bounds=2.338511_48.8460901_2.3398229_48.8446256"
url_5 = "https://trouverunlogement.lescrous.fr/tools/37/search?bounds=2.3426248_48.8441866_2.3447498_48.8435835"
url_6 = "https://trouverunlogement.lescrous.fr/tools/37/search?bounds=2.2723279_48.8710747_2.2745213_48.8691568"
url_7 = "https://trouverunlogement.lescrous.fr/tools/37/search?bounds=2.3290562_48.849943_2.3309234_48.8479074"
url_8 = "https://trouverunlogement.lescrous.fr/tools/37/search?bounds=2.3453689_48.8276182_2.3469872_48.8259151"
url_9 = "https://trouverunlogement.lescrous.fr/tools/37/search?bounds=2.4130316_48.6485333_2.4705092_48.6109217"

locations = {
    "Paris": url_1,
    "ESPCI Paris, Paris": url_2,
    "AgroParisTech, Paris": url_3,
    "MINES ParisTech, Paris": url_4,
    "Chimie ParisTech, Paris": url_5,
    "Université Paris-Dauphine, Paris": url_6,
    "Institut Catholique de Paris, Paris": url_7,
    "Télécom ParisTech, Paris": url_8,
    "Evry": url_9
}

In [15]:
logger.info("Starting the scraper")
while True:
    for location, url in locations.items():
        check_for_new_offers(location, url)

    time.sleep(30)

KeyboardInterrupt: 