<a href="https://colab.research.google.com/github/Chaymae90/beyond-the-map-data-tools/blob/main/ToolsTestBeyond.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# --- Step 0: Install required packages ---
!pip install requests beautifulsoup4 pandas ipywidgets

import requests
from bs4 import BeautifulSoup
import pandas as pd
from IPython.display import display
import ipywidgets as widgets

# --- Step 1: Scrape Explore Agadir Souss Massa ---
def scrape_explore_agadir():
    url = "https://explore-agadirsoussmassa.com/en/"
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    attractions = []

    for link in soup.select('a[href*="/en/"]'):
        title = link.get_text(strip=True)
        href = link.get('href')
        if title and href and "en/" in href and len(title) > 3:
            attractions.append({
                "source": "Explore Agadir Souss Massa",
                "name": title,
                "url": href,
                "description": "",
                "category": ""
            })
    return attractions

# --- Step 2: Scrape Visit Agadir ---
def scrape_visit_agadir():
    url = "https://visitagadir.com/"
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    attractions = []

    for item in soup.select('a[href]'):
        title = item.get_text(strip=True)
        href = item.get('href')
        if title and href and "visitagadir.com" not in href and len(title) > 3:
            attractions.append({
                "source": "Visit Agadir",
                "name": title,
                "url": href,
                "description": "",
                "category": ""
            })
    return attractions

# --- Step 3: Merge & Remove Duplicates ---
explore_data = scrape_explore_agadir()
visit_data = scrape_visit_agadir()

df = pd.DataFrame(explore_data + visit_data).drop_duplicates(subset=["name"])
display(df.head())

# --- Step 4: Filtering UI ---
def filter_data(keyword="", category="", min_name_len=0):
    filtered = df.copy()
    if keyword:
        filtered = filtered[filtered['name'].str.contains(keyword, case=False, na=False)]
    if category:
        filtered = filtered[filtered['category'].str.contains(category, case=False, na=False)]
    if min_name_len > 0:
        filtered = filtered[filtered['name'].str.len() >= min_name_len]
    display(filtered)

keyword_box = widgets.Text(description="Keyword:")
category_box = widgets.Text(description="Category:")
min_len_box = widgets.IntSlider(description="Min name length", min=0, max=50, value=0)

widgets.interactive(filter_data, keyword=keyword_box, category=category_box, min_name_len=min_len_box)

# --- Step 5: Save to CSV ---
df.to_csv("agadir_attractions.csv", index=False)
print("Saved agadir_attractions.csv")
