In [58]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_elements(url, description_selector, artist_selector, title_selector, date_selector):
    # Send an HTTP GET request to fetch the content of the webpage
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find all elements that match the specified description selector
        descriptions = soup.select(description_selector)

        # Find all elements that match the specified artist selector
        artists = soup.select(artist_selector)

        # Find all elements that match the specified title selector
        titles = soup.select(title_selector)

        # Find all elements that match the specified date selector
        dates = soup.select(date_selector)

        # Get the maximum length of the lists
        max_length = max(len(descriptions), len(artists), len(titles), len(dates))

        # Create an empty list to store the scraped data
        data_list = []

        # Loop through the maximum length and extract data for each row
        for i in range(max_length):
            title_text = titles[i].text.strip() if i < len(titles) else None
            description_text = descriptions[i].text.strip() if i < len(descriptions) else None
            artist_name = artists[i].text.strip() if i < len(artists) else None
            date_text = dates[i].text.strip() if i < len(dates) else None

            # Append the data to the list as a tuple (Title, Description, Artist, Date)
            data_list.append((title_text, description_text, artist_name, date_text))

        # Create a DataFrame from the list of scraped data
        df = pd.DataFrame(data_list, columns=["Title", "Description", "Artist", "Date"])
        return df
    else:
        print("Failed to fetch the webpage.")

if __name__ == "__main__":
    # Replace the URL with the one you want to scrape
    url = 'https://co-berlin.org/de/programm/ausstellungen'
    # Replace the selector for descriptions
    description_selector = "div.layout__region.layout__region--second > div > div > article > div.field.field--name-field-caption.field--type-string.field--label-hidden.field__item"
    # Replace the selector for artists
    artist_selector = "div.block.block-layout-builder.block-field-blocknodeexhibitiontitle > span"
    # Replace the selector for titles
    title_selector = "div.block.block-layout-builder.block-field-blocknodeexhibitionfield-subtitle > div"
    # Replace the selector for dates
    date_selector = "div.block.block-layout-builder.block-field-blocknodeexhibitionfield-date-and-time > div > span"

    # Call the function and store the result in a DataFrame
    scraped_df = scrape_elements(url, description_selector, artist_selector, title_selector, date_selector)

    # Print the DataFrame
    print(scraped_df)


                  Title                                        Description  \
0           Poltergeist  Farah Al Qasimi, 'Hand print', 2021 © Farah Al...   
1         Retrospective  Jochen Lempert, 'Untitled (Plastic Bag II)', 2...   
2  Lingering Sensations                                               None   
3            Encounters                                               None   
4                  None                                               None   

            Artist                         Date  
0  Farah Al Qasimi        13. Mai – 6. Sep 2023  
1   Daido Moriyama        13. Mai – 6. Sep 2023  
2   Jochen Lempert        13. Mai – 6. Sep 2023  
3    Image Ecology  16. Sep 2023 – 18. Jan 2024  
4  Mary Ellen Mark  16. Sep 2023 – 18. Jan 2024  


In [59]:
scraped_df

Unnamed: 0,Title,Description,Artist,Date
0,Poltergeist,"Farah Al Qasimi, 'Hand print', 2021 © Farah Al...",Farah Al Qasimi,13. Mai – 6. Sep 2023
1,Retrospective,"Jochen Lempert, 'Untitled (Plastic Bag II)', 2...",Daido Moriyama,13. Mai – 6. Sep 2023
2,Lingering Sensations,,Jochen Lempert,13. Mai – 6. Sep 2023
3,Encounters,,Image Ecology,16. Sep 2023 – 18. Jan 2024
4,,,Mary Ellen Mark,16. Sep 2023 – 18. Jan 2024
