### This notebook fetches Projects from EUNICoast universities from the OpenAIRE Graph API

In [36]:
import json

# Import a file with ids data for the OpenAIRE API
with open('OpenAIRE_names.json', 'r',  encoding='utf-8') as file:
    openaire_uni_data = json.load(file)

print(openaire_uni_data) 

{'West Pomeranian University of Technology in Szczecin': {'names': ['West Pomeranian University of Technology, Szczecin', 'West Pomeranian University of Technology in Szczecin', 'West Pomeranian University of Technology', 'Zachodniopomorski Uniwersytet Technologiczny w Szczecinie', 'ZUT', 'WPUT'], 'openaire_id': ['openorgs____::b8aa0bcfdbbd23a45201d751f16bef7d']}, 'University Le Havre Normandie': {'names': ['Université Le Havre Normandie', 'Le Havre Normandy University', 'University of Le Havre Normandie', 'University of Le Havre', 'Université Le Havre', 'ULHN'], 'openaire_id': ['openorgs____::c6191379d983db69c888627ec029093f']}, 'EMUNI University': {'names': ['Euro-Mediterranean University', 'Euro Mediterranean University', 'Evro-sredozemska univerza', 'Université Euro-Méditerranéenne EMUNI', 'EMUNI', 'Emuni University', 'EMUNI univerza', 'Université EMUNI'], 'openaire_id': ['openorgs____::b7574109c515903acfd8c9320fe226e1']}, 'Burgas Free University': {'names': ['Burgas Free Universit

In [37]:
# Fit the data into a similar structure as with other data fetching with custom university keys

custom_keys_to_full_names = {
    'PL_ZUT': 'West Pomeranian University of Technology in Szczecin',
    'BG_BFU': 'Burgas Free University',
    'GR_UOP': 'University of Patras',
    'HR_UNIDU': 'University of Dubrovnik',
    'SL_EMUNI': 'EMUNI University',  
    'IT_UNISS': 'University of Sassari',
    'FR_UAG': 'University of the French West Indies',
    'PT_UAC': 'University of the Azores',
    'ES_UIB': 'University of the Balearic Islands',
    'FR_ULHN': 'University Le Havre Normandie',
    'FO_UF': 'University of the Faroe Islands',
    'DE_HOCHSTRALSUND': 'Stralsund University of Applied Sciences',
    'FI_AUAS': 'Åland University of Applied Sciences',
}
uni_openaire_ids = {}

for custom_key, full_uni_name in custom_keys_to_full_names.items():
    if full_uni_name in openaire_uni_data:
        uni_info = openaire_uni_data[full_uni_name]
        uni_openaire_ids[custom_key] = uni_info['openaire_id'][0]

print(uni_openaire_ids)

{'PL_ZUT': 'openorgs____::b8aa0bcfdbbd23a45201d751f16bef7d', 'BG_BFU': 'openorgs____::d48d5493a55f0274471fcfbd9ca2a79c', 'GR_UOP': 'openorgs____::4ac562f0376fce3539504567649cb373', 'HR_UNIDU': 'openorgs____::6821d90b67e3605a59aeb0bd9cbde980', 'SL_EMUNI': 'openorgs____::b7574109c515903acfd8c9320fe226e1', 'IT_UNISS': 'openorgs____::8d77c0a4d811f636c1be7ac7c6eca4e7', 'FR_UAG': 'openorgs____::41bca09e4d12d5742e9c03abc779edfd', 'PT_UAC': 'openorgs____::ac452db7f67aaf669dec1c432732baaf', 'ES_UIB': 'openorgs____::ec73e8190abe0bddce02cdae8fcb6814', 'FR_ULHN': 'openorgs____::c6191379d983db69c888627ec029093f', 'FO_UF': 'openorgs____::cd522f6c6c75e1a2c2ac310a421f2e33', 'DE_HOCHSTRALSUND': 'openorgs____::ab60d6f621023ebb004895aa16e9094d', 'FI_AUAS': 'openorgs____::0cc1ade24fb4433270d814dae41f82c7'}


In [38]:
import requests

# Example fetch url: https://api.openaire.eu/graph/v1/projects?relOrganizationId=openorgs____::b8aa0bcfdbbd23a45201d751f16bef7d
BASE_URL = "https://api.openaire.eu/graph/v1/projects"

# Function to fetch projects for a given OpenAIRE organization ID
def fetch_projects_for_university(openaire_id, page=1, pageSize=10):
    """
    Fetches one page of projects related to a specific OpenAIRE organization ID.
    """
    params = {
        "relOrganizationId": openaire_id,
        "page": page,
        "pageSize": pageSize    
    }
    url = f"{BASE_URL}"
    print(f"Fetching projects from: {url} with params: {params}")

    try:
        response = requests.get(url, params=params, timeout=30)
        response.raise_for_status()
        
        # Ensure fetched data is in the JSON format
        content_type = response.headers.get('content-type','')
        if 'application/json' not in content_type:
            print(f"Warning: Expected JSON response but received Content-Type: {content_type}")
        try:
            data = response.json()
        except json.JSONDecodeError:
            print("Error: Could not decode response as JSON.")
            return None, None
        
        results = data['results']
        total_results = data['header']['numFound']

        return results, int(total_results)
    
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
        print(f"Response content: {response.content.decode(errors='ignore')}")
    return None, None


projects_on_this_page, totalResults = fetch_projects_for_university(uni_openaire_ids['PL_ZUT'])
print(projects_on_this_page)
print('totalResults:',totalResults)

 

Fetching projects from: https://api.openaire.eu/graph/v1/projects with params: {'relOrganizationId': 'openorgs____::b8aa0bcfdbbd23a45201d751f16bef7d', 'page': 1, 'pageSize': 10}
totalResults: 45


In [39]:
for project in projects_on_this_page:
    print(project['title'])

New Generation of Functional Cellulose Fibre Based Packaging Materials for Sustainability
VISUAL LITERACY FOR ENGINEERING EDUCATION
SmartAqua4FuturE - SAFE
Development of film and coating products to replace conventional high volatile organic content and heavy metal filled formulations for the speciality electrically conductive coatings market
Empowering Female Engineering Entrepreneurs
Microtubular Solid Oxide Fuel Cell Power System developement and integration into a Mini-UAV
Climate change and European aquatic RESources
ScienceCity Szczecin
Researchers' Night 2009 Szczecin Time
Digital Skills Accelerator


In [40]:
for uni_key in uni_openaire_ids:
    _, numOfProjects = fetch_projects_for_university(uni_openaire_ids[uni_key])
    print(uni_key, numOfProjects)

Fetching projects from: https://api.openaire.eu/graph/v1/projects with params: {'relOrganizationId': 'openorgs____::b8aa0bcfdbbd23a45201d751f16bef7d', 'page': 1, 'pageSize': 10}
PL_ZUT 45
Fetching projects from: https://api.openaire.eu/graph/v1/projects with params: {'relOrganizationId': 'openorgs____::d48d5493a55f0274471fcfbd9ca2a79c', 'page': 1, 'pageSize': 10}
BG_BFU 35
Fetching projects from: https://api.openaire.eu/graph/v1/projects with params: {'relOrganizationId': 'openorgs____::4ac562f0376fce3539504567649cb373', 'page': 1, 'pageSize': 10}
GR_UOP 402
Fetching projects from: https://api.openaire.eu/graph/v1/projects with params: {'relOrganizationId': 'openorgs____::6821d90b67e3605a59aeb0bd9cbde980', 'page': 1, 'pageSize': 10}
HR_UNIDU 40
Fetching projects from: https://api.openaire.eu/graph/v1/projects with params: {'relOrganizationId': 'openorgs____::b7574109c515903acfd8c9320fe226e1', 'page': 1, 'pageSize': 10}
SL_EMUNI 12
Fetching projects from: https://api.openaire.eu/graph/v

In [41]:
from math import floor 
# --- Main Loop for Fetching Data for All Universities ---
all_universities_projects = {}
page_size_for_fetching = 50

for uni_key in uni_openaire_ids:
    print(
        f"\n--- Starting fetch for {uni_key} (OpenAIRE ID: {uni_openaire_ids[uni_key]}) ---")
    current_uni_projects = []
    current_page = 1
    
    projects_on_first_page, number_of_results = fetch_projects_for_university(uni_openaire_ids[uni_key], page=current_page, pageSize=page_size_for_fetching)

    if projects_on_first_page is None:
        print(f"Failed to fetch initial data for {uni_key}. Skipping this university.")
        all_universities_projects[uni_key] = [] # Store empty list
        continue # Move to the next university

    if number_of_results == 0:
        print(f"No projects found by API for {uni_key}.")
        all_universities_projects[uni_key] = []
        continue

    print(f"API reports {number_of_results} total projects for {uni_key}.")   

    current_uni_projects.extend(projects_on_first_page)

    # Floor because the first iteration was already done
    number_of_iterations = floor(number_of_results / page_size_for_fetching)
    if number_of_iterations > 0:
        for i in range(number_of_iterations):
            current_page += 1
            print('current_page',current_page)
            projects_on_this_page, _ = fetch_projects_for_university(
                uni_openaire_ids[uni_key], page=current_page, pageSize=page_size_for_fetching)
            current_uni_projects.extend(projects_on_this_page)
    print("LEN:",len(current_uni_projects))
    all_universities_projects[uni_key] = current_uni_projects



--- Starting fetch for PL_ZUT (OpenAIRE ID: openorgs____::b8aa0bcfdbbd23a45201d751f16bef7d) ---
Fetching projects from: https://api.openaire.eu/graph/v1/projects with params: {'relOrganizationId': 'openorgs____::b8aa0bcfdbbd23a45201d751f16bef7d', 'page': 1, 'pageSize': 50}
API reports 45 total projects for PL_ZUT.
LEN: 45

--- Starting fetch for BG_BFU (OpenAIRE ID: openorgs____::d48d5493a55f0274471fcfbd9ca2a79c) ---
Fetching projects from: https://api.openaire.eu/graph/v1/projects with params: {'relOrganizationId': 'openorgs____::d48d5493a55f0274471fcfbd9ca2a79c', 'page': 1, 'pageSize': 50}
API reports 35 total projects for BG_BFU.
LEN: 35

--- Starting fetch for GR_UOP (OpenAIRE ID: openorgs____::4ac562f0376fce3539504567649cb373) ---
Fetching projects from: https://api.openaire.eu/graph/v1/projects with params: {'relOrganizationId': 'openorgs____::4ac562f0376fce3539504567649cb373', 'page': 1, 'pageSize': 50}
API reports 402 total projects for GR_UOP.
current_page 2
Fetching projects

In [42]:
print(all_universities_projects)



In [43]:
import pandas as pd

all_projects_list = []
for university_key, projects_list in all_universities_projects.items():
    if isinstance(projects_list, list):
        for project_doc in projects_list:
            if isinstance(project_doc, dict):
                # Add the university key to each paper document
                project_doc_with_uni = project_doc.copy()
                project_doc_with_uni['university_key'] = university_key
                all_projects_list.append(project_doc_with_uni)

# Convert the list of dictionaries to a Pandas DataFrame
if all_projects_list:
    df = pd.DataFrame(all_projects_list)
    print(df.head())
    # Define the output file path
    parquet_file_path = 'university_projects.parquet'

    try:
        # Save the DataFrame to a Parquet file
        # `index=False` prevents writing the DataFrame index as a column
        # `engine='pyarrow'` is common, 'fastparquet' is another option
        df.to_parquet(parquet_file_path, index=False, engine='pyarrow')
        print(
            f"Successfully saved data for {len(df)} papers to {parquet_file_path}")

    except Exception as e:
        print(f"Error saving data to Parquet: {e}")
else:
    print("No paper data was collected to save.")



                                               id                      code  \
0  corda_______::bfc079dba34c7caf247c0518e89232e4                    290098   
1  erasmusplus_::6f571366ff2a7ba6038e1eaf68871581  2019-1-PL01-KA202-065157   
2  corda_____he::f92bfce1bb4183f66d6b4d6925fdfe2a                 101084549   
3  corda_______::dce7a795c11e7123a6a68b5a6f2354f2                    285908   
4  erasmusplus_::81fbb2fe7053750d1aa372486c5f08bb  2018-1-PL01-KA202-050847   

     acronym                                              title websiteUrl  \
0  NEWGENPAK  New Generation of Functional Cellulose Fibre B...       None   
1       None          VISUAL LITERACY FOR ENGINEERING EDUCATION       None   
2       SAFE                            SmartAqua4FuturE - SAFE       None   
3  TRANSCOND  Development of film and coating products to re...       None   
4       None        Empowering Female Engineering Entrepreneurs       None   

    startDate     endDate                 callIdentifier