In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

def scrape_cordis(urls):
    # Create a DataFrame to store the data
    df = pd.DataFrame(columns=['Project Name', 'Participant', 'Country', 'Funding', 'Status'])
    
    # Loop through the provided URLs
    for url in urls:
        try:
            response = requests.get(url)
            soup = BeautifulSoup(response.text, 'html.parser')
            if response.status_code != 200:
                print(f"Error fetching URL: {url}. Status code: {response.status_code}")
            else:
                # Extract the project name
                project_name = soup.find('div', class_='c-project-info__acronym').text.strip() # Adjust the tag and class as needed

                # Extract the coordinator section
                coordinator_section = soup.find('div', class_='c-organisation c-part-info')
                coordinator_name = coordinator_section.find('div', class_='c-part-info__title').text.strip()
                coordinator_country = ''
                coordinator_funding_text = coordinator_section.find('div', class_='c-part-info__content').text.strip()
                coordinator_funding_amount = float(coordinator_funding_text.replace('€', '').replace(' ', '').replace(',', '.'))

                # Add the coordinator to the DataFrame
                df.loc[len(df)] = [project_name, coordinator_name, coordinator_country, coordinator_funding_amount, 'COORDINATOR']

                # Extract the participants section
                participants_section = soup.find('div', class_='c-organizations-list')
                if participants_section is None:
                    print(f"No participants found for {project_name}. Skipping...")
                    continue

                # Loop through the participants
                for participant_div in participants_section.find_all('div', class_='c-organizations-list__item'):

                    # Extract the organization name
                    name = participant_div['data-title']

                    # Extract the country of origin
                    country = participant_div.find('div', class_='c-part-info__country-alt').text.strip()

                    # Extract the funding amount
                    funding_text = participant_div.find('div', class_='c-part-info__content').text.strip()
                    funding_amount = float(funding_text.replace('€', '').replace(' ', '').replace(',', '.'))

                    # Add the data to the DataFrame using loc
                    df.loc[len(df)] = [project_name, name, country, funding_amount, 'PARTICIPANT']
        except requests.exceptions.RequestException as e:
            print(f"An error occurred while fetching {project_name}: {str(e)}")
            continue
            
        except Exception as e:
            print(f"An error occurred while processing {project_name}: {str(e)}. Skipping...")
            continue
        except requests.exceptions.RequestException as e:
            print(f"Failed to fetch {project_name} after multiple attempts: {str(e)}")
            continue

    
    return df

# List of URLs to scrape
urls = [
    "https://cordis.europa.eu/project/id/833115",
"https://cordis.europa.eu/project/id/786894",
"https://cordis.europa.eu/project/id/711264",
"https://cordis.europa.eu/project/id/740723",
"https://cordis.europa.eu/project/id/787061",
"https://cordis.europa.eu/project/id/700367",
"https://cordis.europa.eu/project/id/786748",
"https://cordis.europa.eu/project/id/786629",
"https://cordis.europa.eu/project/id/786731",
"https://cordis.europa.eu/project/id/786993",
"https://cordis.europa.eu/project/id/740754",
"https://cordis.europa.eu/project/id/740688",
"https://cordis.europa.eu/project/id/740558",
"https://cordis.europa.eu/project/id/786687",
"https://cordis.europa.eu/project/id/740543",
"https://cordis.europa.eu/project/id/740466",
"https://cordis.europa.eu/project/id/787100",
"https://cordis.europa.eu/project/id/833464",
"https://cordis.europa.eu/project/id/833635",
"https://cordis.europa.eu/project/id/833444",
"https://cordis.europa.eu/project/id/786767",
"https://cordis.europa.eu/project/id/653879",
"https://cordis.europa.eu/project/id/833881",
"https://cordis.europa.eu/project/id/700381",
"https://cordis.europa.eu/project/id/740593",
"https://cordis.europa.eu/project/id/833650",
"https://cordis.europa.eu/project/id/740898",
"https://cordis.europa.eu/project/id/700378",
"https://cordis.europa.eu/project/id/700510",
"https://cordis.europa.eu/project/id/786727",
"https://cordis.europa.eu/project/id/700326",
"https://cordis.europa.eu/project/id/699824",
"https://cordis.europa.eu/project/id/700688",
"https://cordis.europa.eu/project/id/833276",
"https://cordis.europa.eu/project/id/653587",
"https://cordis.europa.eu/project/id/767542",
"https://cordis.europa.eu/project/id/700281",
"https://cordis.europa.eu/project/id/786729",
"https://cordis.europa.eu/project/id/653350",
"https://cordis.europa.eu/project/id/833672",
"https://cordis.europa.eu/project/id/740934",
"https://cordis.europa.eu/project/id/740610",
"https://cordis.europa.eu/project/id/700621",
"https://cordis.europa.eu/project/id/806097",
"https://cordis.europa.eu/project/id/647850",
"https://cordis.europa.eu/project/id/700626",
"https://cordis.europa.eu/project/id/720417",
"https://cordis.europa.eu/project/id/787128",
"https://cordis.europa.eu/project/id/883297",
"https://cordis.europa.eu/project/id/832735",
"https://cordis.europa.eu/project/id/786913",
"https://cordis.europa.eu/project/id/740859",
"https://cordis.europa.eu/project/id/740560",
"https://cordis.europa.eu/project/id/787111",
"https://cordis.europa.eu/project/id/101022001",
"https://cordis.europa.eu/project/id/883596",
"https://cordis.europa.eu/project/id/883293",
"https://cordis.europa.eu/project/id/786945",
"https://cordis.europa.eu/project/id/740580",
"https://cordis.europa.eu/project/id/740072",
"https://cordis.europa.eu/project/id/740773",
"https://cordis.europa.eu/project/id/787031",
"https://cordis.europa.eu/project/id/832800"
]

# Call the function to scrape the data
result_df = scrape_cordis(urls)

# Write the result to a CSV file
result_df.to_csv('results.csv', index=False)

# Display the result DataFrame
result_df

print("Scraping completed and results saved to results.csv.")


No participants found for SURVEIRON. Skipping...
