## Importation des librairie

In [1]:
# librairie gestion des données

import pandas as pd

# librairie scraping

import requests
from bs4 import BeautifulSoup
import re
from requests.auth import HTTPBasicAuth

# librairie envoi du mail

from mailtrap import Mail, Address, MailtrapClient

## Définition des fonctions scrapping

In [2]:
def extract_bar_links(url):
    """
    Extracts links of bars from a given URL.
    
    Args:
    url (str): URL of the page containing bar links.
    
    Returns:
    list: List of bar links.
    """
    response_page = requests.get(url)
    soup_page = BeautifulSoup(response_page.content, 'html.parser')
    links = [a['href'] for a in soup_page.find_all('a', class_='name')]
    return links

def extract_bar_info(href_bar):
    """
    Extracts information of a bar from its link.
    
    Args:
    href_bar (str): URL of the bar.
    
    Returns:
    dict: Dictionary containing information of the bar.
    """
    response = requests.get(href_bar)
    soup = BeautifulSoup(response.content, 'html.parser')
    nom_bar = soup.find('h1', class_='mb-0 d-inline-block').text.strip()
    heures_ouverture = soup.find('div', class_="col-lg-6 mb-3 mb-md-0").text.strip()
    services = []
    table_services = soup.find('table', class_='table-services')
    if table_services:
        for row in table_services.find_all('tr'):
            service = row.find('td', class_='text-truncate').text.strip()
            disponibilite = row.find('td', class_='float-right').text.strip()
            if disponibilite == 'Oui':
                services.append(service)
    tarifs = soup.find('div', class_= "position-relative").text.strip()
    return {
        'Nom': nom_bar,
        'Heures_Ouverture': heures_ouverture,
        'Services': services,
        'Tarifs': tarifs
    }

def extract_prices_and_links(urls_pages):
    """
    Extracts prices of happy hours and links of bars.
    
    Args:
    urls_pages (list): List of URLs of pages containing bar links.
    
    Returns:
    tuple: Tuple containing lists of prices and links.
    """
    noms_bars = []
    prix_happy_hours = []

    for url_page in urls_pages:
        response_page = requests.get(url_page)
        soup = BeautifulSoup(response_page.content, 'html.parser')

        balises = soup.find_all('a', class_="name")
        for balise in balises:
            nom_bar = balise.text.strip()
            noms_bars.append(nom_bar)

        price = soup.find_all('p', class_="text-truncate mb-2 text-secondary")
        for item in price:
            motif = re.search(r'\d\,\d{2}', item.text)
            if motif:
                prix_motif = float(motif.group(0).replace(',', '.'))
                prix_happy_hours.append(prix_motif)
            else:
                prix_happy_hours.append(None)  # Replace "NA" with None

    return noms_bars, prix_happy_hours

def create_dataframe(all_bars_info, noms_bars, all_links):
    """
    Creates a DataFrame from the extracted information of bars.
    
    Args:
    all_bars_info (list): List containing dictionaries of bar information.
    noms_bars (list): List of bar names.
    all_links (list): List of bar links.
    
    Returns:
    pandas.DataFrame: DataFrame containing bar information.
    """
    df = pd.DataFrame(all_bars_info)
    df['Prix Happy Hour'] = prix_happy_hours
    df["Liens"] = all_links
    
    return df

def export_csv(df, file_path):
    """
    Exporte un DataFrame en fichier CSV.

    Args:
    df (pd.DataFrame): Le DataFrame à exporter.
    file_path (str): Le chemin de fichier où le CSV doit être enregistré.

    Returns:
    None
    """
    try:
        df.to_csv(file_path, index=False)
        print(f"Le fichier CSV a été enregistré avec succès à l'emplacement : {file_path}")
    except Exception as e:
        print(f"Une erreur s'est produite lors de l'exportation du fichier CSV : {e}")

## Définition de la fonction analysis

In [3]:
def clean_and_sort_dataframe(df):
    """
    Cleans and sorts the DataFrame by happy hour prices.
    
    Args:
    df (pandas.DataFrame): DataFrame containing bar information.
    
    Returns:
    pandas.DataFrame: Cleaned and sorted DataFrame.
    """

    df_lim = df.dropna()
    df_lim = df_lim.sort_values(by='Prix Happy Hour')
    df_lim = df_lim.head(5)
    df_lim.reset_index(drop=True, inplace=True)
    
    return df_lim

## Définitions des fonctions e-mail

In [4]:
def generate_html_content(top_bars):
    """
    Generates the HTML content for the email based on the top bars.
    
    Args:
    top_bars (list): List of dictionaries containing information about the top bars.
    
    Returns:
    str: HTML content for the email.
    """
    html_content = """
    <!DOCTYPE html>
    <html lang="fr">
    <head>
        <meta charset="UTF-8">
        <title>Rapport des Happy Hours à Toulouse</title>
        <style>
            body { font-family: Arial, sans-serif; }
            table { width: 100%; border-collapse: collapse; }
            th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
            th { background-color: #f2f2f2; }
        </style>
    </head>
    <body>
        <h1>Rapport des Happy Hours à Toulouse</h1>
        <h2>Top 5 des Meilleurs Deals</h2>
        <table>
            <tr>
                <th>Nom du Bar</th>
                <th>Adresse</th>
                <th>Prix Happy Hour</th>
                <th>Prix Normal</th>
                <th>Économie</th>
            </tr>
    """

    for bar in top_bars:
        html_content += f"""
            <tr>
                <td>{bar['Nom']}</td>
                <td>{bar['Adresse']}</td>
                <td>{bar['Prix Happy Hour']} €</td>
                <td>{bar['Prix Normal']} €</td>
                <td>{bar['Économie']} €</td>
            </tr>
        """
    
    html_content += """
        </table>
    </body>
    </html>
    """

    return html_content

def send_email(html_content):
    """
    Sends an HTML email with the provided content.
    
    Args:
    html_content (str): HTML content of the email.
    """

    mail = Mail(
        sender=Address(email="quentin.goumeziane.edu@groupe-gema.com", name="Mailtrap Test"),
        to=[Address(email="quentin.goumeziane@gmail.com", name="Recipient Name")],
        subject="Top 5 bars Toulouse Happy Hour",
        text="This is a fallback text for email clients that don't render HTML",
        html=html_content,
        category="HTML Email",
        headers={"X-Custom-Header": "Value"}
    )

    # Send the email
    client = MailtrapClient("username", "password")  # Replace with your Mailtrap username and password
    client.send(mail)

## PARTIE 1

In [None]:
# List of URLs of pages containing bar links
urls_pages = [
    "https://www.schlouk-map.com/fr/cities/toulouse/happy-hour",
    "https://www.schlouk-map.com/fr/cities/toulouse/happy-hour?page=2"
]

# Extracting bar links
all_links = []
for url_page in urls_pages:
    bar_links = extract_bar_links(url_page)
    all_links.extend(bar_links)

# Extracting information of each bar
all_bars_info = []
for link in all_links:
    full_url = "https://www.schlouk-map.com" + link
    bar_info = extract_bar_info(full_url)
    all_bars_info.append(bar_info)

# Extracting bar names and prices of happy hours
noms_bars, prix_happy_hours = extract_prices_and_links(urls_pages)

# Creating DataFrame
df = create_dataframe(all_bars_info, noms_bars, all_links)

export_csv(df, 'data.csv')

df

## PARTIE 2

In [None]:
# Cleaning and sorting DataFrame
df_lim = clean_and_sort_dataframe(df)

export_csv(df_lim, 'top5.csv')

df_lim

In [None]:
df['Prix Happy Hour'].describe()

## PARTIE 3

In [None]:
# Top 5 bars data
top_bars = [
    {'Nom': 'Chez Tonton', 'Adresse': '16 Place Saint-Pierre, 31000 Toulouse', 'Prix Happy Hour': 2.6, 'Prix Normal': 3.5, 'Économie': 1.1},
    {'Nom': '1862 Artybar', 'Adresse': '24 Rue Nicolas Bachelier, 31000 Toulouse', 'Prix Happy Hour': 3.0, 'Prix Normal': 6.0, 'Économie': 3.0},
    {'Nom': 'Le Petit Voisin', 'Adresse': '37 Rue Peyrolières, 31000 Toulouse', 'Prix Happy Hour': 3.0, 'Prix Normal': 4.0, 'Économie': 1.0},
    {'Nom': 'MAD (Meet And Drink)', 'Adresse': '4 Allées Charles de Fitte, 31300 Toulouse', 'Prix Happy Hour': 3.0, 'Prix Normal': 5.0, 'Économie': 2.0},
    {'Nom': 'Matabiau Décapsule Club', 'Adresse': '74 Rue Matabiau, 31000 Toulouse', 'Prix Happy Hour': 3.7, 'Prix Normal': 4.9, 'Économie': 1.2}
]

# Generate HTML content
html_content = generate_html_content(top_bars)

# Send email with HTML content
send_email(html_content)