In [17]:
#Loading libraries 
import os                  # For creating directories and handling file paths
import re                  # For extracting year using regular expressions
import requests            # For sending HTTP requests to download the CSV
from bs4 import BeautifulSoup  # For parsing HTML to scrape links
from io import BytesIO
import zipfile  # Needed to work with ZIP files
from urllib.parse import urljoin


In [1]:
# Step 1: User input
year = input("Enter the year of the Stack Overflow survey you want to download (e.g., 2024): ").strip()
base_url = "https://survey.stackoverflow.co/"
headers = {"User-Agent": "Mozilla/5.0"}

Enter the year of the Stack Overflow survey you want to download (e.g., 2024):  2024


In [4]:
# Step 2: Scrape main page for ZIP file
try:
    res = requests.get(base_url, headers=headers)
    res.raise_for_status()
    soup = BeautifulSoup(res.text, "html.parser")
except requests.exceptions.RequestException as e:
    print(f" Could not load the base page: {e}")
    exit()


In [18]:
# Step 3: Find the ZIP file link
zip_url = None
for link in soup.find_all("a", href=True):
    href = link["href"]
    print(href)
    if href.endswith(".zip") and year in href:
        zip_url = href if href.startswith("http") else urljoin(base_url, href)
        print(f" Found ZIP file: {zip_url}")
        break

if not zip_url:
    print(f"No ZIP file found for year {year}.")
    exit()


/
https://trends.stackoverflow.co/
https://survey.stackoverflow.co/
https://survey.stackoverflow.co/2024
./datasets/stack-overflow-developer-survey-2024.zip
 Found ZIP file: https://survey.stackoverflow.co/datasets/stack-overflow-developer-survey-2024.zip


In [19]:
# Step 4: Download ZIP
try:
    zip_response = requests.get(zip_url)
    zip_response.raise_for_status()
except requests.exceptions.RequestException as e:
    print(f" Error downloading ZIP: {e}")
    exit()

In [20]:
# Step 5: Prepare folders
raw_folder = r"C:\Users\kanmani\Desktop\AutoStack360\Data\Raw"
meta_folder = r"C:\Users\kanmani\Desktop\AutoStack360\Data\Metadata"
os.makedirs(raw_folder, exist_ok=True)
os.makedirs(meta_folder, exist_ok=True)

In [21]:
# Step 6: Extract and organize
try:
    with zipfile.ZipFile(BytesIO(zip_response.content)) as z:
        print("Files in ZIP:", z.namelist())
        for file_name in z.namelist():
            if "survey_results_public.csv" in file_name:
                save_path = os.path.join(raw_folder, f"{year}_survey_results_public.csv")
            else:
                save_path = os.path.join(meta_folder, f"{year}_" + os.path.basename(file_name))

            with z.open(file_name) as src, open(save_path, "wb") as dest:
                dest.write(src.read())
            print(f"Extracted: {save_path}")

except Exception as e:
    print(f"Error extracting ZIP: {e}")

Files in ZIP: ['2024 Developer Survey.pdf', 'survey_results_public.csv', 'survey_results_schema.csv']
Extracted: C:\Users\kanmani\Desktop\AutoStack360\Data\Metadata\2024_2024 Developer Survey.pdf
Extracted: C:\Users\kanmani\Desktop\AutoStack360\Data\Raw\2024_survey_results_public.csv
Extracted: C:\Users\kanmani\Desktop\AutoStack360\Data\Metadata\2024_survey_results_schema.csv
