In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import json

In [3]:
df_PublicInterestAI = pd.read_csv("PublicInterestAI_Projekte.csv", sep=";")
df_PublicInterestAI.head()

Unnamed: 0,Quelle,Projektname,Art,Einsatzbereich,Webseite-Link,Organisation,Ansprechperson,Email,Status,Kurzzusammenfassung,Unnamed: 10


In [None]:
def scrape_all_projects():
    """
    Scrape all projects by finding the specific button classes
    """
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    
    driver = webdriver.Chrome(options=chrome_options)
    wait = WebDriverWait(driver, 15)
    
    try:
        print("Loading page...")
        driver.get("https://publicinterest.ai/tool/map/directory")
        time.sleep(5)
        
        all_projects = []
        page_num = 1
        
        while True:
            print(f"Scraping page {page_num}...")
            
            # Wait for content to load
            try:
                wait.until(EC.presence_of_element_located((By.TAG_NAME, "button")))
            except:
                print("No buttons found")
                break
            
            # Get page source
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            
            # Find the specific project buttons
            project_buttons = soup.find_all('button', class_='sc-1b0661b3-0 sc-ef00fa6-0 cMVLjt bGXFPi')
            
            if not project_buttons:
                # Try more flexible approach
                project_buttons = soup.find_all('button', class_=lambda x: x and 'sc-1b0661b3-0' in x and 'sc-ef00fa6-0' in x)
            
            if not project_buttons:
                # Even more flexible - just look for buttons with those class patterns
                project_buttons = soup.find_all('button', class_=lambda x: x and 'cMVLjt' in x and 'bGXFPi' in x)
            
            print(f"Found {len(project_buttons)} project buttons on page {page_num}")
            
            if not project_buttons:
                print("No project buttons found, stopping")
                break
            
            # Extract project data from each button
            for button in project_buttons:
                project_data = extract_project_data(button)
                if project_data:
                    all_projects.append(project_data)
            
            # Try to find next page button
            try:
                # Look for pagination or load more buttons
                next_selectors = [
                    'button[aria-label*="next"]',
                    'button[aria-label*="Next"]', 
                    'button:contains("Load more")',
                    'button:contains("Show more")',
                    '.pagination button:last-child'
                ]
                
                next_clicked = False
                for selector in next_selectors:
                    try:
                        next_buttons = driver.find_elements(By.CSS_SELECTOR, selector)
                        for btn in next_buttons:
                            if btn.is_enabled() and btn.is_displayed():
                                driver.execute_script("arguments[0].click();", btn)
                                time.sleep(3)
                                next_clicked = True
                                break
                        if next_clicked:
                            break
                    except:
                        continue
                
                if not next_clicked:
                    print("No more pages found")
                    break
                    
                page_num += 1
                
            except Exception as e:
                print(f"Pagination error: {e}")
                break
        
        return all_projects
        
    finally:
        driver.quit()

def extract_project_data(button):
    """
    Extract data from project button, focusing on sc-4576c65c-1 colBIV divs
    """
    project = {}
    
    try:
        # Look for the specific div class you mentioned
        target_divs = button.find_all('div', class_='sc-4576c65c-1 colBIV')
        
        if not target_divs:
            # Try more flexible approach
            target_divs = button.find_all('div', class_=lambda x: x and 'sc-4576c65c-1' in x)
        
        if not target_divs:
            # Even more flexible
            target_divs = button.find_all('div', class_=lambda x: x and 'colBIV' in x)
        
        # Extract the complete HTML soup for each target div
        project['target_divs_soup'] = []
        project['target_divs_html'] = []
        
        for i, div in enumerate(target_divs):
            # Store the BeautifulSoup object (as string representation)
            project['target_divs_soup'].append(str(div))
            
            # Store the HTML content
            project['target_divs_html'].append(div.prettify())
            
            # Also extract structured data from each div
            div_data = {
                'index': i,
                'classes': div.get('class', []),
                'id': div.get('id', ''),
                'text_content': div.get_text(strip=True),
                'attributes': dict(div.attrs),
                'child_elements': []
            }
            
            # Get information about child elements
            for child in div.find_all(recursive=False):  # Direct children only
                child_info = {
                    'tag': child.name,
                    'classes': child.get('class', []),
                    'text': child.get_text(strip=True),
                    'attributes': dict(child.attrs)
                }
                div_data['child_elements'].append(child_info)
            
            project.setdefault('div_details', []).append(div_data)
        
        # Extract title (usually the first or largest text)
        title = ""
        for div in target_divs:
            text = div.get_text(strip=True)
            if text and len(text) > len(title):
                title = text
        
        if not title:
            # Fallback - get any prominent text from the button
            all_text = button.get_text(strip=True).split('\n')
            for text in all_text:
                if text and len(text) > 5 and len(text) < 200:
                    title = text
                    break
        
        project['title'] = title or "No title found"
        
        # Get text content from target divs
        div_contents = []
        for div in target_divs:
            content = div.get_text(strip=True)
            if content:
                div_contents.append(content)
        
        project['div_text_contents'] = div_contents
        
        # Extract other useful data
        project['button_soup'] = str(button)  # Complete button HTML
        project['button_classes'] = ' '.join(button.get('class', []))
        
        # Get all links within the target divs
        div_links = []
        for div in target_divs:
            for a in div.find_all('a'):
                href = a.get('href')
                if href:
                    if href.startswith('/'):
                        href = 'https://publicinterest.ai' + href
                    div_links.append({
                        'url': href,
                        'text': a.get_text(strip=True),
                        'attributes': dict(a.attrs)
                    })
        project['div_links'] = div_links
        
        # Extract any images within target divs
        div_images = []
        for div in target_divs:
            for img in div.find_all('img'):
                src = img.get('src', '')
                if src.startswith('/'):
                    src = 'https://publicinterest.ai' + src
                div_images.append({
                    'src': src,
                    'alt': img.get('alt', ''),
                    'attributes': dict(img.attrs)
                })
        project['div_images'] = div_images
        
        # Get complete text for filtering/searching later
        project['full_text'] = button.get_text(separator=' | ', strip=True)
        
        return project
        
    except Exception as e:
        print(f"Error extracting project: {e}")
        return None

def main():
    print("Scraping all projects...")
    projects = scrape_all_projects()
    
    print(f"\nTotal projects scraped: {len(projects)}")
    
    if projects:
        # Save to JSON
        with open('all_projects.json', 'w', encoding='utf-8') as f:
            json.dump(projects, f, indent=2, ensure_ascii=False)
        
        print("\nSample projects with soup data:")
        for i, project in enumerate(projects[:3]):  # Show fewer but with more detail
            print(f"\nProject {i+1}:")
            print(f"Title: {project.get('title', 'N/A')}")
            print(f"Number of target divs found: {len(project.get('target_divs_soup', []))}")
            
            # Show the HTML soup for each target div
            for j, div_soup in enumerate(project.get('target_divs_soup', [])):
                print(f"\nTarget Div {j+1} HTML:")
                print(div_soup)
                print(f"\nTarget Div {j+1} Prettified:")
                if j < len(project.get('target_divs_html', [])):
                    print(project['target_divs_html'][j])
            
            # Show structured div details
            if project.get('div_details'):
                print(f"\nDiv Details:")
                for detail in project['div_details']:
                    print(f"  Div {detail['index']}: classes={detail['classes']}, text='{detail['text_content'][:100]}...'")
                    print(f"  Child elements: {len(detail['child_elements'])}")
                    for child in detail['child_elements']:
                        print(f"    {child['tag']}: {child['text'][:50]}...")
            
            print("-" * 80)
        
        # Filter for Germany-related projects after scraping
        german_projects = []
        search_terms = ['germany', 'german', 'deutschland', 'berlin', 'munich', 'hamburg']
        
        for project in projects:
            full_text = project.get('full_text', '').lower()
            if any(term in full_text for term in search_terms):
                german_projects.append(project)
        
        print(f"\nFound {len(german_projects)} Germany-related projects")
        
        if german_projects:
            with open('german_projects.json', 'w', encoding='utf-8') as f:
                json.dump(german_projects, f, indent=2, ensure_ascii=False)
    
    return projects

if __name__ == "__main__":
    projects = main()

Scraping all projects...
Loading page...
Scraping page 1...
Found 40 project buttons on page 1
No more pages found

Total projects scraped: 40

Sample projects with soup data:

Project 1:
Title: "KIVI"  KI + vigilareDüsseldorf, GermanyMedia Authority of NRW
Number of target divs found: 0
--------------------------------------------------------------------------------

Project 2:
Title: ADISRotterdam, NetherlandsThe Ocean Cleanup Projects B.V.
Number of target divs found: 0
--------------------------------------------------------------------------------

Project 3:
Title: AI4GridsKonstanz, GermanyHTWG-Konstanz
Number of target divs found: 0
--------------------------------------------------------------------------------

Found 22 Germany-related projects


In [43]:
soup = BeautifulSoup(projects[0]["button_soup"], 'html.parser')
soup

<button class="sc-1b0661b3-0 sc-ef00fa6-0 cMVLjt bGXFPi"><p>"KIVI"  KI + vigilare </p><ul class="sc-94b8f193-0 oMHBF"><li class="sc-83595954-1 MTNHn"><span class="undefined svg" style='display: block; width: 100%; height: 100%; background-position: center center; background-repeat: no-repeat; background-size: contain; background-image: url("data:image/svg+xml,%3Csvg%20xmlns%3D\"http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg\"%20width%3D\"11.398\"%20height%3D\"16\"%3E%3Cpath%20fill%3D\"%23fff\"%20d%3D\"M5.699%200a5.7%205.7%200%200%200-4.567%209.109l3.655%206.332a1%201%200%200%200%20.068.119l.008.014a1.022%201.022%200%200%200%201.613.063l.01.006.036-.063a1%201%200%200%200%20.135-.234l3.582-6.2A5.7%205.7%200%200%200%205.699%200m-.056%208.589a2.808%202.808%200%201%201%202.809-2.808%202.81%202.81%200%200%201-2.808%202.808Z\"%2F%3E%3C%2Fsvg%3E");'></span><span>Düsseldorf, Germany</span></li><li class="sc-83595954-1 MTNHn"><span class="undefined svg" style='display: block; width: 100%; height: 100%; ba

In [42]:
link = soup.find_all('a')
link

[]

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import json

def scrape_all_projects():
    """
    Scrape all projects by finding buttons, clicking them, and collecting detailed content
    """
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    
    driver = webdriver.Chrome(options=chrome_options)
    wait = WebDriverWait(driver, 15)
    
    try:
        print("Loading page...")
        driver.get("https://publicinterest.ai/tool/map/directory")
        time.sleep(5)
        
        all_projects = []
        page_num = 1
        
        while True:
            print(f"Scraping page {page_num}...")
            
            # Wait for content to load
            try:
                wait.until(EC.presence_of_element_located((By.TAG_NAME, "button")))
            except:
                print("No buttons found")
                break
            
            # Find all project buttons using Selenium (for clicking)
            selenium_buttons = driver.find_elements(By.CSS_SELECTOR, 'button.sc-1b0661b3-0.sc-ef00fa6-0.cMVLjt.bGXFPi')
            
            if not selenium_buttons:
                # Try more flexible approach
                selenium_buttons = driver.find_elements(By.CSS_SELECTOR, 'button[class*="sc-1b0661b3-0"][class*="sc-ef00fa6-0"]')
            
            if not selenium_buttons:
                # Even more flexible
                selenium_buttons = driver.find_elements(By.CSS_SELECTOR, 'button[class*="cMVLjt"][class*="bGXFPi"]')
            
            print(f"Found {len(selenium_buttons)} clickable project buttons on page {page_num}")
            
            if not selenium_buttons:
                print("No project buttons found, stopping")
                break
            
            # Click each button and collect detailed data
            for i, button in enumerate(selenium_buttons):
                print(f"Processing button {i+1}/{len(selenium_buttons)}")
                
                try:
                    # Scroll to button and click
                    driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", button)
                    time.sleep(1)
                    
                    # Click the button
                    driver.execute_script("arguments[0].click();", button)
                    time.sleep(2)  # Wait for details to load
                    
                    # Get the page content after clicking
                    soup = BeautifulSoup(driver.page_source, 'html.parser')
                    
                    # Extract data from the clicked state
                    project_data = extract_clicked_project_data(soup, i)
                    
                    if project_data:
                        all_projects.append(project_data)
                    
                    # Close modal/details if there's a close button
                    try:
                        close_selectors = [
                            'button[aria-label*="close"]',
                            'button[aria-label*="Close"]',
                            '.modal-close',
                            '[data-testid*="close"]',
                            'button:contains("×")',
                            'button:contains("Close")'
                        ]
                        
                        for selector in close_selectors:
                            try:
                                close_btn = driver.find_element(By.CSS_SELECTOR, selector)
                                if close_btn.is_displayed():
                                    driver.execute_script("arguments[0].click();", close_btn)
                                    time.sleep(1)
                                    break
                            except:
                                continue
                    except:
                        pass
                    
                    # Press Escape key as fallback to close modal
                    try:
                        from selenium.webdriver.common.keys import Keys
                        driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.ESCAPE)
                        time.sleep(1)
                    except:
                        pass
                    
                except Exception as e:
                    print(f"Error processing button {i+1}: {e}")
                    continue
            
            # Try to find next page button
            try:
                next_selectors = [
                    'button[aria-label*="next"]',
                    'button[aria-label*="Next"]', 
                    'button:contains("Load more")',
                    'button:contains("Show more")',
                    '.pagination button:last-child'
                ]
                
                next_clicked = False
                for selector in next_selectors:
                    try:
                        next_buttons = driver.find_elements(By.CSS_SELECTOR, selector)
                        for btn in next_buttons:
                            if btn.is_enabled() and btn.is_displayed():
                                driver.execute_script("arguments[0].click();", btn)
                                time.sleep(3)
                                next_clicked = True
                                break
                        if next_clicked:
                            break
                    except:
                        continue
                
                if not next_clicked:
                    print("No more pages found")
                    break
                    
                page_num += 1
                
            except Exception as e:
                print(f"Pagination error: {e}")
                break
        
        return all_projects
        
    finally:
        driver.quit()

def extract_clicked_project_data(soup, button_index):
    """
    Extract data after clicking a project button, focusing on sc-4576c65c-0 ffKmeh divs
    """
    project = {
        'button_index': button_index,
        'timestamp': time.time()
    }
    
    try:
        # Look for the initial project button divs (sc-4576c65c-1 colBIV)
        initial_divs = soup.find_all('div', class_='sc-4576c65c-1 colBIV')
        if not initial_divs:
            initial_divs = soup.find_all('div', class_=lambda x: x and 'colBIV' in x)
        
        project['initial_divs_count'] = len(initial_divs)
        project['initial_divs_soup'] = [str(div) for div in initial_divs]
        
        # Look for the target divs that appear after clicking (sc-4576c65c-0 ffKmeh)
        target_divs = soup.find_all('div', class_='sc-4576c65c-0 ffKmeh')
        
        if not target_divs:
            # Try more flexible approach
            target_divs = soup.find_all('div', class_=lambda x: x and 'sc-4576c65c-0' in x)
        
        if not target_divs:
            # Even more flexible
            target_divs = soup.find_all('div', class_=lambda x: x and 'ffKmeh' in x)
        
        print(f"  Found {len(target_divs)} target divs (sc-4576c65c-0 ffKmeh)")
        
        # Extract complete soup and details for each target div
        project['target_divs_soup'] = []
        project['target_divs_html'] = []
        project['target_divs_details'] = []
        
        for i, div in enumerate(target_divs):
            # Store the complete HTML
            project['target_divs_soup'].append(str(div))
            project['target_divs_html'].append(div.prettify())
            
            # Extract structured details
            div_detail = {
                'index': i,
                'classes': div.get('class', []),
                'id': div.get('id', ''),
                'text_content': div.get_text(strip=True),
                'attributes': dict(div.attrs),
                'all_links': [],
                'all_images': [],
                'all_text_elements': []
            }
            
            # Extract all links within this div
            for a in div.find_all('a'):
                href = a.get('href', '')
                if href.startswith('/'):
                    href = 'https://publicinterest.ai' + href
                div_detail['all_links'].append({
                    'url': href,
                    'text': a.get_text(strip=True),
                    'attributes': dict(a.attrs)
                })
            
            # Extract all images
            for img in div.find_all('img'):
                src = img.get('src', '')
                if src.startswith('/'):
                    src = 'https://publicinterest.ai' + src
                div_detail['all_images'].append({
                    'src': src,
                    'alt': img.get('alt', ''),
                    'attributes': dict(img.attrs)
                })
            
            # Extract all text elements with their tags
            for element in div.find_all(text=True):
                parent = element.parent
                if parent and element.strip():
                    div_detail['all_text_elements'].append({
                        'tag': parent.name,
                        'text': element.strip(),
                        'parent_classes': parent.get('class', [])
                    })
            
            project['target_divs_details'].append(div_detail)
        
        # Extract title from various sources
        title = ""
        
        # Try from target divs first
        for div in target_divs:
            headings = div.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
            for h in headings:
                text = h.get_text(strip=True)
                if text and len(text) > len(title):
                    title = text
        
        # Fallback to initial divs
        if not title:
            for div in initial_divs:
                text = div.get_text(strip=True)
                if text and len(text) > len(title) and len(text) < 200:
                    title = text
        
        project['title'] = title or f"Project {button_index + 1}"
        
        # Get all visible text from target divs
        target_div_texts = []
        for div in target_divs:
            text = div.get_text(strip=True)
            if text:
                target_div_texts.append(text)
        
        project['target_div_texts'] = target_div_texts
        project['target_divs_count'] = len(target_divs)
        
        # Look for any modal or popup content
        modal_selectors = [
            '[role="dialog"]',
            '.modal',
            '[data-testid*="modal"]',
            '[aria-modal="true"]'
        ]
        
        modal_content = []
        for selector in modal_selectors:
            modals = soup.select(selector)
            for modal in modals:
                modal_content.append({
                    'selector': selector,
                    'html': str(modal),
                    'text': modal.get_text(strip=True)
                })
        
        project['modal_content'] = modal_content
        
        return project
        
    except Exception as e:
        print(f"Error extracting clicked project data: {e}")
        return None

def extract_project_data(button):
    """
    Extract data from project button, focusing on sc-4576c65c-1 colBIV divs
    """
    project = {}
    
    try:
        # Look for the specific div class you mentioned
        target_divs = button.find_all('div', class_='sc-4576c65c-1 colBIV')
        
        if not target_divs:
            # Try more flexible approach
            target_divs = button.find_all('div', class_=lambda x: x and 'sc-4576c65c-1' in x)
        
        if not target_divs:
            # Even more flexible
            target_divs = button.find_all('div', class_=lambda x: x and 'colBIV' in x)
        
        # Extract the complete HTML soup for each target div
        project['target_divs_soup'] = []
        project['target_divs_html'] = []
        
        for i, div in enumerate(target_divs):
            # Store the BeautifulSoup object (as string representation)
            project['target_divs_soup'].append(str(div))
            
            # Store the HTML content
            project['target_divs_html'].append(div.prettify())
            
            # Also extract structured data from each div
            div_data = {
                'index': i,
                'classes': div.get('class', []),
                'id': div.get('id', ''),
                'text_content': div.get_text(strip=True),
                'attributes': dict(div.attrs),
                'child_elements': []
            }
            
            # Get information about child elements
            for child in div.find_all(recursive=False):  # Direct children only
                child_info = {
                    'tag': child.name,
                    'classes': child.get('class', []),
                    'text': child.get_text(strip=True),
                    'attributes': dict(child.attrs)
                }
                div_data['child_elements'].append(child_info)
            
            project.setdefault('div_details', []).append(div_data)
        
        # Extract title (usually the first or largest text)
        title = ""
        for div in target_divs:
            text = div.get_text(strip=True)
            if text and len(text) > len(title):
                title = text
        
        if not title:
            # Fallback - get any prominent text from the button
            all_text = button.get_text(strip=True).split('\n')
            for text in all_text:
                if text and len(text) > 5 and len(text) < 200:
                    title = text
                    break
        
        project['title'] = title or "No title found"
        
        # Get text content from target divs
        div_contents = []
        for div in target_divs:
            content = div.get_text(strip=True)
            if content:
                div_contents.append(content)
        
        project['div_text_contents'] = div_contents
        
        # Extract other useful data
        project['button_soup'] = str(button)  # Complete button HTML
        project['button_classes'] = ' '.join(button.get('class', []))
        
        # Get all links within the target divs
        div_links = []
        for div in target_divs:
            for a in div.find_all('a'):
                href = a.get('href')
                if href:
                    if href.startswith('/'):
                        href = 'https://publicinterest.ai' + href
                    div_links.append({
                        'url': href,
                        'text': a.get_text(strip=True),
                        'attributes': dict(a.attrs)
                    })
        project['div_links'] = div_links
        
        # Extract any images within target divs
        div_images = []
        for div in target_divs:
            for img in div.find_all('img'):
                src = img.get('src', '')
                if src.startswith('/'):
                    src = 'https://publicinterest.ai' + src
                div_images.append({
                    'src': src,
                    'alt': img.get('alt', ''),
                    'attributes': dict(img.attrs)
                })
        project['div_images'] = div_images
        
        # Get complete text for filtering/searching later
        project['full_text'] = button.get_text(separator=' | ', strip=True)
        
        return project
        
    except Exception as e:
        print(f"Error extracting project: {e}")
        return None

def main():
    print("Scraping all projects...")
    projects = scrape_all_projects()
    
    print(f"\nTotal projects scraped: {len(projects)}")
    
    if projects:
        # Save to JSON
        with open('all_projects.json', 'w', encoding='utf-8') as f:
            json.dump(projects, f, indent=2, ensure_ascii=False)
        
        print("\nSample projects with clicked content:")
        for i, project in enumerate(projects[:3]):  # Show fewer but with more detail
            print(f"\nProject {i+1} (Button {project.get('button_index', 'unknown')}):")
            print(f"Title: {project.get('title', 'N/A')}")
            print(f"Initial divs found: {project.get('initial_divs_count', 0)}")
            print(f"Target divs found after clicking: {project.get('target_divs_count', 0)}")
            
            # Show the HTML soup for each target div (sc-4576c65c-0 ffKmeh)
            for j, div_soup in enumerate(project.get('target_divs_soup', [])):
                print(f"\n--- Target Div {j+1} (sc-4576c65c-0 ffKmeh) ---")
                print("Raw HTML:")
                print(div_soup[:500] + "..." if len(div_soup) > 500 else div_soup)
                
                print(f"\nText content:")
                if j < len(project.get('target_div_texts', [])):
                    print(project['target_div_texts'][j][:300] + "..." if len(project['target_div_texts'][j]) > 300 else project['target_div_texts'][j])
                
                # Show structured details
                if j < len(project.get('target_divs_details', [])):
                    details = project['target_divs_details'][j]
                    print(f"Links found: {len(details.get('all_links', []))}")
                    print(f"Images found: {len(details.get('all_images', []))}")
                    print(f"Text elements: {len(details.get('all_text_elements', []))}")
                    
                    # Show links
                    for link in details.get('all_links', [])[:3]:  # Show first 3 links
                        print(f"  Link: {link['text']} -> {link['url']}")
            
            # Show modal content if any
            if project.get('modal_content'):
                print(f"\nModal content found: {len(project['modal_content'])} modals")
                for modal in project['modal_content']:
                    print(f"  Modal text preview: {modal['text'][:200]}...")
            
            print("-" * 80)
        
        # Filter for Germany-related projects after scraping
        german_projects = []
        search_terms = ['germany', 'german', 'deutschland', 'berlin', 'munich', 'hamburg']
        
        for project in projects:
            full_text = project.get('full_text', '').lower()
            if any(term in full_text for term in search_terms):
                german_projects.append(project)
        
        print(f"\nFound {len(german_projects)} Germany-related projects")
        
        if german_projects:
            with open('german_projects.json', 'w', encoding='utf-8') as f:
                json.dump(german_projects, f, indent=2, ensure_ascii=False)
    
    return projects

if __name__ == "__main__":
    projects = main()

Scraping all projects...
Loading page...
Scraping page 1...
Found 40 clickable project buttons on page 1
Processing button 1/40
  Found 1 target divs (sc-4576c65c-0 ffKmeh)




Processing button 2/40
  Found 1 target divs (sc-4576c65c-0 ffKmeh)
Processing button 3/40
  Found 1 target divs (sc-4576c65c-0 ffKmeh)
Processing button 4/40
  Found 1 target divs (sc-4576c65c-0 ffKmeh)
Processing button 5/40
  Found 1 target divs (sc-4576c65c-0 ffKmeh)
Processing button 6/40
  Found 1 target divs (sc-4576c65c-0 ffKmeh)
Processing button 7/40
  Found 1 target divs (sc-4576c65c-0 ffKmeh)
Processing button 8/40
  Found 1 target divs (sc-4576c65c-0 ffKmeh)
Processing button 9/40
  Found 1 target divs (sc-4576c65c-0 ffKmeh)
Processing button 10/40
  Found 1 target divs (sc-4576c65c-0 ffKmeh)
Processing button 11/40
  Found 1 target divs (sc-4576c65c-0 ffKmeh)
Processing button 12/40
  Found 1 target divs (sc-4576c65c-0 ffKmeh)
Processing button 13/40
  Found 1 target divs (sc-4576c65c-0 ffKmeh)
Processing button 14/40
  Found 1 target divs (sc-4576c65c-0 ffKmeh)
Processing button 15/40
  Found 1 target divs (sc-4576c65c-0 ffKmeh)
Processing button 16/40
  Found 1 target d

In [59]:
project_links = []

for i in range(len(projects)):
    project_links.append(projects[i]["target_divs_details"][0]["all_links"][0]["url"])

In [64]:
project_links[0]

'https://publicinterest.ai/tool/map/project/kivi-ki-vigilare'

In [63]:
df_PublicInterestAI["Quelle"] = project_links
df_PublicInterestAI.to_csv("PublicInterestAI_Projekte.csv")