In [20]:
import pandas as pd
import numpy as np
import time
import re as re
from selenium import webdriver
import translators as ts
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


driver = webdriver.Chrome()
def get_data_trygfond(url):
    
    """
    This function will collect the grant information provided from the Tryg foundation grant database with the url: https://www.tryghed.dk/saadan-stoetter-vi/projekter-og-donationer?type=projekter-og-donationer
    
    The paths are found using devtools, and found through the webpage.
    
    note: When the window opens, one must actively choose a web browser (eg. Ecosia) and close the
    the Cookie popup window - to start the extraction. 
    
    The information is saved in the lists of the following groups:
    year_l = []
    rec_location = []
    amount_l = []
    desc = []
    project_category_l = []
    
    and if data is not avaiable for a certain group, then "NA" is inserted. 
    
    The lists are finally zipped together in the biglist dataframe.
    
    input: target_url (https://www.tryghed.dk/saadan-stoetter-vi/projekter-og-donationer?type=projekter-og-donationer")
    return: Dataframe with text extracted
    """

    year_l = []
    rec_location = []
    amount_l = []
    desc = []
    project_category_l = []
    
    driver.get(url)
    driver.maximize_window() #opening full web browser window
    time.sleep(5)
    
    #click on the #donations button
    action = ActionChains(driver)
    
    try:
        action.move_to_element(driver.find_element(By.XPATH, "/html/body/div[3]/main/div/div[2]/div[1]/div/div/a[4]")).perform()  #move to place
        element = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '/html/body/div[3]/main/div/div[2]/div[1]/div/div/a[4]')))
        element.click()
        
        for page_num in range(1,241): #241 times clicked
            time.sleep(5)
            
            ActionChains(driver).move_to_element(driver.find_element(By.XPATH, "/html/body/div[3]/main/div/div[2]/div[2]/div/ul/li[2]")).perform()  #move to place
            for i in range(2,32):
                #time.sleep(7)

                action.move_to_element(driver.find_element(By.XPATH,f"/html/body/div[3]/main/div/div[2]/div[2]/div/ul/li[{i}]")).perform()  #move to the line of interest
                try:
                    project_cat = driver.find_element(By.XPATH, f"/html/body/div[3]/main/div/div[2]/div[2]/div/ul/li[{i}]/div/div[2]/div").text
                    project_category_l.append(project_cat)
                except NoSuchElementException: #avoid programme crashing
                    project_cat = "NA"
                    project_category_l.append(project_cat)
                
                try:
                    description = driver.find_element(By.XPATH, f"/html/body/div[3]/main/div/div[2]/div[2]/div/ul/li[{i}]/div/div[2]/h3").text
                    desc.append(description)
                except NoSuchElementException: #avoid programme crashing
                    description = "NA"
                    desc.append(description)
                
                try:
                    rec_loc = driver.find_element(By.XPATH, f"/html/body/div[3]/main/div/div[2]/div[2]/div/ul/li[{i}]/div/div[2]/p").text
                    rec_location.append(rec_loc)
                except NoSuchElementException:
                    rec_loc = "NA"
                    rec_location.append(rec_loc)
                
                try:    
                    amount = driver.find_element(By.XPATH, f"/html/body/div[3]/main/div/div[2]/div[2]/div/ul/li[{i}]/div/div[3]/div/p").text
                    amount_l.append(amount)
                except NoSuchElementException:
                    amount = "NA"
                    amount_l.append(amount)
                
                try:
                    year = driver.find_element(By.XPATH, f"/html/body/div[3]/main/div/div[2]/div[2]/div/ul/li[{i}]/div/div[3]/div/time").text
                    year_l.append(year)
                except NoSuchElementException:
                    year = "NA"
                    year_l.append(year)

                
                if i == 31:
                    ActionChains(driver).move_to_element(driver.find_element(By.CSS_SELECTOR, "#main > div > div.c-listing.c-project-listing > div.c-project-listing__main > div > div.c-pagination > button.c-pagination__button.c-link-icon--button.c-link-icon.c-link-icon--right > svg")).perform()  #move to click button
                    element = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#main > div > div.c-listing.c-project-listing > div.c-project-listing__main > div > div.c-pagination > button.c-pagination__button.c-link-icon--button.c-link-icon.c-link-icon--right > svg")))
                    element.click()
                    time.sleep(7)

            
    except NoSuchElementException: #avoid programme crashing
        print("Loop ended")
        print(i)
        print(project_cat)
        print(description)
        print(rec_loc)
        print(amount)
        print(year)
        


    df_final = pd.DataFrame(list(zip(year_l, rec_location, amount_l, desc, project_category_l)), columns=["Year",'Reciever','Grant size (DKK)', 'Description', "Project Category"])

    return df_final

biglist = get_data_trygfond("https://www.tryghed.dk/saadan-stoetter-vi/projekter-og-donationer?type=projekter-og-donationer")
pd.set_option('display.max_columns', None)
biglist.to_csv('trygfonden_data.csv', index=True)
biglist.to_csv('trygfonden_sepped_data.csv', index=True, sep = ";")