In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Define parameters
years = range(2020, 2025)
kommuner = [
    "Botkyrka", "Danderyd", "Haninge", "Huddinge", "Järfälla", "Lidingö", "Nacka", "Sollentuna", "Solna", 
    "Stockholm", "Sundbyberg", "Södertälje", "Tyresö", "Täby", "Upplands Väsby", "Vallentuna", "Vaxholm", "Värmdö"
]
program_keyword = "Naturvetenskapsprogrammet"

# Base URL (update this based on the actual structure of the website)
base_url = "https://gymnasieantagningen.storsthlm.se/slutantagning/"

# Initialize DataFrame
columns = ["Year", "Kommun", "School", "Program", "Specialization", "Antagningsgräns", "Median"]
data = []

for year in years:
    for kommun in kommuner:
        # Build the URL for the specific year and kommun
        params = {"year": year, "kommun": kommun}
        response = requests.get(base_url, params=params)
        
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, "html.parser")

            # Parse data for schools and programs
            schools = soup.find_all("div", class_="school-info")
            for school in schools:
                school_name = school.find("h3").get_text(strip=True)

                programs = school.find_all("div", class_="program-info")
                for program in programs:
                    program_name = program.find("h4").get_text(strip=True)

                    if program_keyword in program_name:
                        specialization = program.find("p", class_="specialization").get_text(strip=True)
                        antagningsgrans = program.find("span", class_="antagningsgrans").get_text(strip=True) if program.find("span", class_="antagningsgrans") else "N/A"
                        median = program.find("span", class_="median").get_text(strip=True) if program.find("span", class_="median") else "N/A"

                        # Append to data list
                        data.append([year, kommun, school_name, program_name, specialization, antagningsgrans, median])
        else:
            print(f"Failed to retrieve data for year {year} and kommun {kommun}")

# Create DataFrame
df = pd.DataFrame(data, columns=columns)

print(df.head())
