In [1]:
from bs4 import BeautifulSoup
import requests
import multiprocessing
import numpy as np
import pandas as pd
from ast import literal_eval
from gensim import corpora, models, similarities
from fuzzywuzzy import fuzz
from nltk.tokenize import sent_tokenize, word_tokenize
from stop_words import get_stop_words
from random import shuffle
import stop_words
from gensim.parsing.porter import PorterStemmer
import time
import re
import os
import pylab as pl
from ipywidgets import FloatProgress
from IPython import display
import matplotlib.pyplot as plt

In [2]:
def get_urls_from_url(main_url):
    resp = requests.get(main_url)
    soup = BeautifulSoup(resp.content, 'html.parser')
    urls = []
    links = soup.find_all('a')
    for url in links:
        try:
            url = url.attrs['href']
            if len(url) > 5:
                urls.append(url)
        except:
            pass
    return urls

def get_texts_from_resp(resp):
    # parse the web response
    soup = BeautifulSoup(resp.content, 'html.parser')
    # find and filter texts
    print("These are texts under", resp.url)
    texts = soup.find_all('p')
    print("number of items grabed are", len(texts))
    texts = [text for text in texts if len(text.text) > 100]
    print("number of items after filtering", len(texts))
    # output texts
    for text in texts:
        #print(text.text)
        yield text.text

def url_is_valid(url):
    try:
        resp = requests.get(url, timeout=10)
        assert resp.status_code == 200
        return resp
    except:
        return False

def url_compare(url1, url2, thresh=70):
    """
    Based on the similarity between roots of two url, return whether these two url are smiliary or not
    """
    # extract pattern in () http(s)://()/???/???
    url1 = re.sub("(https?://)?", "", url1).split('/')[0]
    url2 = re.sub("(https?://)?", "", url2).split('/')[0]
    
    # find similarity between roots
    root_sim = fuzz.partial_ratio(url1, url2)
    
    if root_sim >= thresh:
        return True
    else:
        print(url1, " and ", url2, " may not be relevent")
        return False

def get_text_from_url_with_check(url, main_url):
    """
    The bottom function that extract text from url
    """
    # avoid url ends with .pdf
    if url.split(".")[-1] == "pdf":
        return []
    
    # check if url is valid
    resp = url_is_valid(url)
    # if the url is not valid, it is possible that it is in the form of 
    if not resp:
        if not "http" in url:
            url = main_url + url
            resp = url_is_valid(url)
            if not resp:
                print("url:", url, "invalid")
                return []
        else:
            print("url:", url, "invalid")
            return []
        
    # double check if the url is actually visited
    if resp.url != url: # meaning its redirected, which means an error happened
        # in many cases, the redirection is due to website has prefix https instead of http
        url = url[:4] + 's' + url[4:]
        resp = url_is_valid(url)
        if resp:
            if resp.url == url:
                print('try succeeded')
        else:
            return []
        
    # check if url is the child or sibling of main_url
    # sometimes, the url is directed to same irrelevent sites such as www.twitter.com etc.
    if not url_compare(main_url, resp.url):
        return []
    
    # get text from url
    text_data = []
    for text in get_texts_from_resp(resp):
        text_data.append(text)
    return text_data

def get_text_from_url_and_its_children(main_url):
    """
    Parallalize the text extraction process from given main url
    """
    # preprocess main url
    # remove space in url
    main_url = main_url.replace(" ", "")
    # force https:// prefix to the main url
    main_url = "https://" + re.sub("(https?://)?", "", main_url)
    # remove last "/" if there is one
    if main_url[-1] == "/":
        main_url = main_url[:-1]
    
    print("starting to crawl main url: ", main_url)
    
    # check validity of main_url
    resp = url_is_valid(main_url)
    if not resp:
        print("main_url: ", main_url, " is not valid")
        return "Main site not accessible"

    # grab all urls in this web page
    urls = [main_url]
    urls.extend(get_urls_from_url(main_url))
    # remove duplicated urls
    urls = list(set(urls)) 
    print("\n\nthese are the children links we crawled")
    print(urls, "\n")
    # grab all texts in each urls asynchronously
    # argmumentize urls
    urls = [(url, main_url) for url in urls]
    with multiprocessing.Pool(processes=24) as pool:
        text_data = pool.starmap(get_text_from_url_with_check, urls) 
    
    # collect output text data
    text_data = [text for text in text_data if len(text_data) > 0] # remove empty returns
    text_data = [text for text_list in text_data for text in text_list] # get list elements to str
    return " ".join(text_data)

In [3]:
database = pd.read_csv('../Examples/database.csv').iloc[:, [1, 5]]
database = database.dropna()
database['Crawled'] = np.vectorize(get_text_from_url_and_its_children)(database["Website"])

starting to crawl main url:  https://www.aecfafrica.org


these are the children links we crawled
['/portfolio/competitions', '\n\n\n\n\n/portfolio/aecf_gender_lens_investment\n\n\n', 'https://www.aecfafrica.org', '/portfolio/aecf-connect', '/media-centre/videos', '/node/326', '\n\n\n\n\n/about-us/who-we-are\n\n\n', '\n\n\n\n\n/portfolio/renewable-energy\n\n\n', '/node/299', '/portfolio/aecf_gender_lens_investment', '/portfolio/agribusiness', '/media-centre/blog', '/portfolio/overview', 'https://www.linkedin.com/company/africa-enterprise-challenge-fund', 'tel:+254203675394', '/work-with-us', 'https://twitter.com/AecfAfrica', '/about-us/funding-partners', '/about-us/strategic-partners', '/portfolio/renewable_energy/react_ssa', 'tel:+254703033394', '/node/337', '/knowledge-hub', '/pt-pt', '\n\n\n\n\n/about-us/funding-partners\n\n\n', '\n\n\n\n\nmailto:seedsforimpact@aecfafrica.org\n\n\n', '/about-us/The-AECF-Board', '/portfolio/renewable-energy/REACT-EEP', '/portfolio/renewable-energy', 

url: https://www.aecfafrica.org




mailto:seedsforimpact@aecfafrica.org


 invalid
 invalid
url: https://www.linkedin.com/company/africa-enterprise-challenge-fund invalid
These are texts under https://www.aecfafrica.org/portfolio/renewable-energy
number of items grabed are 5
number of items after filtering 2
url: http://www.parioagency.com invalid
These are texts under https://www.aecfafrica.org/portfolio/renewable-energy/REACT-EEP
number of items grabed are 16
number of items after filtering 6
These are texts under https://www.aecfafrica.org/contact
number of items grabed are 1
number of items after filtering 0
These are texts under https://www.aecfafrica.org/contact-us
number of items grabed are 4
number of items after filtering 0
These are texts under https://www.aecfafrica.org/node/337
number of items grabed are 2
number of items after filtering 0
www.aecfafrica.org  and  twitter.com  may not be relevent
These are texts under https://www.aecfafrica.org/node/329
number of items gra

number of items grabed are 39
number of items after filtering 34
acumen.org  and  www.facebook.com  may not be relevent
These are texts under https://acumen.org/contact/
number of items grabed are 34
number of items after filtering 1
acumen.org  and  twitter.com  may not be relevent
These are texts under https://acumen.org/partners/
number of items grabed are 16
number of items after filtering 7
These are texts under https://acumen.org/reports/
number of items grabed are 15
number of items after filtering 7
acumen.org  and  www.youtube.com  may not be relevent
These are texts under https://www.plusacumen.org/courses/social-entrepreneurship-101
number of items grabed are 6
number of items after filtering 3
These are texts under https://acumen.org/anti-corruption-policy/
number of items grabed are 16
number of items after filtering 5
try succeeded
These are texts under https://acumen.org/work-with-us/
number of items grabed are 10
number of items after filtering 4
These are texts under h

url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/search/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/projects-and-operations/project-cycle/project-identification/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/topics-and-sectors/initiatives-partnerships/microfinance-multidonor-trust-fund/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/topics-and-sectors/topics/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/countries/central-africa/central-african-republic/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/about-us/mission-strategy/context/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/knowledge/statistics/ invalid
url: https://ww

url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/documents/board-documents/board-of-governors-documents/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/projects-and-operations/financial-products/african-development-bank/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/documents/publications/mdg-report/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/topics-and-sectors/initiatives-partnerships/african-peer-review-mechanism/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/countries/east-africa/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/topics-and-sectors/topics/employment/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/topics-and-s

url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/knowledge/publications/tracking-africa%e2%80%99s-progress-in-figures/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/countries/southern-africa/botswana/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/topics-and-sectors/initiatives-partnerships/power-africa-initiative/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/about-us/mission-strategy/objectives/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/countries/west-africa/senegal/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/documents/knowledge/annual-meetings-seminars/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/countries/southern

url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/countries/west-africa/guinea/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/cookies-privacy-policy/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/documents/publications/economic-briefs/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/topics-and-sectors/initiatives-partnerships/global-environment-facility-gef/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/projects-and-operations/project-cycle/project-preparation/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/documents/evaluation-reports/annual-reports-on-evaluation/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/knowledge/publicati

url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/news-and-events/african-development-bank-is-key-to-africas-economic-development-say-southern-african-governors-18003/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/documents/budget-documents/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/documents/publications/gender-poverty-and-environmental-indicators-on-african-countries/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/topics-and-sectors/sectors/climate-change/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/countries/east-africa/uganda/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/documents/corporate-procurement/requests-for-proposals-rfp/ invalid
url: https://www.afdb.org/en/abo

url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/countries/west-africa/cabo-verde/ invalid
These are texts under http://projectsportal.afdb.org/dataportal/?lang=en
number of items grabed are 0
number of items after filtering 0
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/topics-and-sectors/initiatives-partnerships/extractive-industries-transparency-initiative/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/topics-and-sectors/initiatives-partnerships/multi-donor-governance-trust-fund/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/knowledge/statistics/africa-information-highway-aih/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/topics-and-sectors/initiatives-partnerships/green-growth-initiative/ invalid
These are texts under https://esa.afdb

url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/projects-and-operations/procurement/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/projects-and-operations/financial-management/financial-management-policies-procedures/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/documents/evaluation-reports/country-sector-reviews-and-case-studies/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/knowledge/statistics/statistical-capacity-building/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/rss-feeds/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/topics-and-sectors/topics/quality-assurance-results/development-effectiveness-reviews/ invalid
url: https://www.afdb.org/en/about-us/corporate-informat

url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/countries/southern-africa/zimbabwe/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/documents/legal-documents/loan-and-grant-conditions/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/news-and-events/5-milliards-de-francs-cfa-de-la-banque-africaine-de-developpement-pour-insuffler-une-nouvelle-dynamique-au-centre-regional-de-formation-en-entretien-routier-cerfer-18440/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/documents/project-operations/annual-portfolio-performance-review-appr/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/topics-and-sectors/initiatives-partnerships/african-water-facility/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-

url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/topics-and-sectors/initiatives-partnerships/africa50/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/about-us/organisational-structure/ethics-office/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/documents/publications/national-strategy-for-the-development-of-statistics-nsds/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/topics-and-sectors/initiatives-partnerships/sustainable-energy-for-all-se4all/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/countries/west-africa/burkina-faso/ invalid
url: https://www.afdb.org/en/about-us/corporate-information/african-development-fund-adf/en/topics-and-sectors/initiatives-partnerships/africa-trade-fund/ invalid
url: https://www.afdb.org/en/about-us/

url: https://www.lgtvp.com/en/en/detailnews/New-report-on-state-of-oral-healthcare-for-urban-low-income-households-in-India/ invalid
These are texts under https://www.lgtvp.com/en/portfolio/portfolio-overview/
number of items grabed are 27
number of items after filtering 8
url: https://www.lgtvp.com/en/en/fellowship/about-the-fellowship/preparation/ invalid
url: https://www.lgtvp.com/en/en/detailnews/m2m-First-Early-Childhood-Development-Social-Impact-Bond-is-launched-in-South-Africa/ invalid
url: https://www.lgtvp.com/en/en/detailnews/Educate-Girls-Worlds-first-development-impact-bond-in-education-surpassed-target-outcomes/ invalid
url: https://www.lgtvp.com/en/en/what-we-do/approach/ invalid
url: https://www.lgtvp.com/en/en/detailnews/Breakfast-event-on-Capacity-Building-Support/ invalid
url: https://www.lgtvp.com/en/en/fellowship/partners/ invalid
url: https://www.lgtvp.com/en/en/fellowship/impact-career/ invalid
url: https://www.lgtvp.com/en/en/what-we-do/impact/ invalid
url: https

number of items after filtering 16
number of items after filtering 23
These are texts under http://www.grofin.com/language/en/about_grofin/#section2_whyweexist
These are texts under http://www.grofin.com/language/en/entrepreneurs/#section1_grofinmorethanfinance
number of items grabed are 40
number of items grabed are 32
number of items after filtering 23
number of items after filtering 16
These are texts under http://www.grofin.com/language/en/entrepreneurs/#section8_stillhavequestions
number of items grabed are 32
number of items after filtering 16
These are texts under http://www.grofin.com/language/en/entrepreneurs/#section7_grofinclientsuccess
number of items grabed are 32
number of items after filtering 16
These are texts under http://www.grofin.com/language/en/about_grofin/#section1_whoweare
number of items grabed are 40
number of items after filtering 23
These are texts under http://www.grofin.com/language/en/business_support/#section4_businesssupportsuccess
number of items grab

These are texts under http://idev.afdb.org/
number of items grabed are 15
number of items after filtering 5
These are texts under https://www.afdb.org/en/knowledge/publications/tracking-africa%e2%80%99s-progress-in-figures/
number of items grabed are 3
number of items after filtering 1
These are texts under https://www.afdb.org/en/topics-and-sectors/topics/millennium-development-goals-mdgs/
number of items grabed are 24
number of items after filtering 6
These are texts under https://www.afdb.org/en/about-us/organisational-structure/administrative-tribunal/
number of items grabed are 16
These are texts under https://www.afdb.org/en/countries/southern-africa/botswana/
number of items after filtering 10
number of items grabed are 17
number of items after filtering 3
These are texts under https://www.afdb.org/en/topics-and-sectors/sectors/transport/
number of items grabed are 9
number of items after filtering 3
These are texts under https://www.afdb.org/en/documents/financial-information/d

These are texts under https://www.afdb.org/en/countries/southern-africa/mozambique/
number of items grabed are 19
number of items after filtering 4
These are texts under https://www.afdb.org/en/about-us/frequently-asked-questions/
number of items grabed are 23
number of items after filtering 21
These are texts under https://www.afdb.org/en/projects-and-operations/financial-management/financial-management-services-contacts/
number of items grabed are 1
number of items after filtering 0
These are texts under https://www.afdb.org/en/sitemap/
number of items grabed are 0
number of items after filtering 0
These are texts under https://www.afdb.org/en/topics-and-sectors/initiatives-partnerships/african-peer-review-mechanism/
number of items grabed are 7
number of items after filtering 6
These are texts under https://www.afdb.org/en/countries/east-africa/
These are texts under https://www.afdb.org/en/about-us/mission-strategy/objectives/
number of items grabed are 7
number of items grabed are

number of items after filtering 3
These are texts under https://www.afdb.org/en/countries/southern-africa/sao-tome-principe/
number of items grabed are 22
number of items after filtering 5
These are texts under https://www.afdb.org/en/documents/environmental-social-assessments/
number of items grabed are 23
number of items after filtering 0
These are texts under https://www.afdb.org/en/news-and-events/loans-grants/
number of items grabed are 22
number of items after filtering 10
These are texts under https://www.afdb.org/en/topics-and-sectors/initiatives-partnerships/enhanced-private-sector-assistance-for-africa-epsa-initiative/
number of items grabed are 129
number of items after filtering 20
These are texts under https://www.afdb.org/en/about-us/organisational-structure/
number of items grabed are 2
number of items after filtering 1
www.afdb.org  and  www.youtube.com  may not be relevent
These are texts under https://www.afdb.org/en/knowledge/publications/africa-competitiveness-repor

number of items grabed are 3
number of items after filtering 0
These are texts under https://www.afdb.org/en/documents/project-operations/monthly-economic-review/
number of items grabed are 23
number of items after filtering 0
These are texts under https://www.afdb.org/en/topics-and-sectors/initiatives-partnerships/investment-climate-facility/
number of items grabed are 11
number of items after filtering 9
These are texts under https://www.afdb.org/en/documents/policy-documents/policies-on-cross-cutting-issues/
number of items grabed are 13
number of items after filtering 0
www.afdb.org  and  twitter.com  may not be relevent
These are texts under https://www.afdb.org/en/topics-and-sectors/
number of items grabed are 4
number of items after filtering 0
These are texts under https://www.afdb.org/en/news-and-events/success-stories/nairobi-addis-ababa-road-corridor-boosts-trade-in-east-and-horn-of-africa/
number of items grabed are 33
number of items after filtering 18
These are texts unde

number of items grabed are 22
number of items after filtering 11
These are texts under https://www.afdb.org/en/documents/project-related-procurement/procurement-statistics/
number of items grabed are 1
number of items after filtering 0
These are texts under https://www.afdb.org/en/documents/project-operations/country-performance-assessment-cpa/
number of items grabed are 3
number of items after filtering 0
These are texts under https://www.afdb.org/en/topics-and-sectors/sectors/infrastructure/
number of items grabed are 2
number of items after filtering 0
These are texts under https://www.afdb.org/en/documents/legal-documents/partnership-agreements/
number of items grabed are 22
number of items after filtering 0
These are texts under https://www.afdb.org/en/documents/evaluation-reports/evaluation-working-paper/
number of items grabed are 15
number of items after filtering 0
These are texts under https://www.afdb.org/en/documents/administrative-tribunal/statute/
number of items grabed a

These are texts under https://www.afdb.org/en/about-us/corporate-procurement/
number of items grabed are 2
number of items after filtering 1
These are texts under https://www.afdb.org/en/about-us/corporate-procurement/procurement-notices/current-solicitations/
number of items grabed are 3
number of items after filtering 2
These are texts under https://www.afdb.org/en/topics-and-sectors/initiatives-partnerships/health-in-africa-fund/
number of items grabed are 16
number of items after filtering 16
These are texts under https://www.afdb.org/en/documents/project-operations/projectprogramme-completion-reports/
number of items grabed are 22
number of items after filtering 1
These are texts under https://www.afdb.org/en/topics-and-sectors/topics/poverty-reduction/
number of items grabed are 7
number of items after filtering 4
These are texts under https://www.afdb.org/en/topics-and-sectors/initiatives-partnerships/sustainable-energy-fund-for-africa/
number of items grabed are 19
number of it

number of items grabed are 15
number of items after filtering 15
These are texts under https://www.afdb.org/en/topics-and-sectors/initiatives-partnerships/green-climate-fund/
number of items grabed are 17
number of items after filtering 10
These are texts under https://www.afdb.org/en/documents/project-operations/poverty-reduction-strategy-papers/
number of items grabed are 10
number of items after filtering 0
These are texts under https://www.afdb.org/en/blogs/economic-growth-human-and-social-development/
number of items grabed are 31
number of items after filtering 12
These are texts under https://www.afdb.org/en/topics-and-sectors/sectors/water-supply-sanitation/
number of items grabed are 6
number of items after filtering 2
These are texts under https://www.afdb.org/en/documents/project-operations/country-governance-profiles/
number of items grabed are 21
number of items after filtering 0
These are texts under https://www.afdb.org/en/topics-and-sectors/sectors/health/
number of ite

number of items grabed are 8
number of items after filtering 7
These are texts under https://www.afdb.org/en/topics-and-sectors/initiatives-partnerships/multi-partner-somalia-infrastructure-fund-sif/
number of items grabed are 23
number of items after filtering 8
These are texts under https://www.afdb.org/en/documents/policy-documents/
number of items grabed are 11
number of items after filtering 0
These are texts under https://www.afdb.org/en/documents/departmental-annual-reports/
number of items grabed are 10
number of items after filtering 0
These are texts under https://www.afdb.org/en/the-high-5/
number of items grabed are 12
number of items after filtering 7
These are texts under https://www.afdb.org/en/documents/project-operations/project-appraisal-reports/
number of items grabed are 22
number of items after filtering 0
These are texts under https://www.afdb.org/en/topics-and-sectors/topics/partnerships/
number of items grabed are 4
number of items after filtering 1
These are te

number of items grabed are 2
number of items after filtering 1
These are texts under https://www.afdb.org/en/topics-and-sectors/initiatives-partnerships/programme-for-infrastructure-development-in-africa-pida/
number of items grabed are 27
number of items after filtering 10
These are texts under https://www.afdb.org/en/documents/legal-documents/loan-and-grant-conditions/
number of items grabed are 1
number of items after filtering 0
These are texts under https://www.afdb.org/en/glossary/
number of items grabed are 0
number of items after filtering 0
These are texts under https://www.afdb.org/en/topics-and-sectors/initiatives-partnerships/africa-trade-fund/
number of items grabed are 7
number of items after filtering 3
These are texts under https://www.afdb.org/en/news-and-events/multimedia/
number of items grabed are 8
number of items after filtering 0
These are texts under https://www.afdb.org/en/documents/project-operations/
number of items grabed are 22
number of items after filteri

url: http://twitter.com/devfinancenews, invalid
www.cdcgroup.com  and  twitter.com  may not be relevent
These are texts under https://www.cdcgroup.com/en/our-investments/key-data/
number of items grabed are 273
number of items after filtering 13
url: https://www.linkedin.com/company/cdc-group-plc/ invalid
These are texts under https://www.cdcgroup.com/en/story/pristine-logistics/
number of items grabed are 49
number of items after filtering 24
try succeeded
www.cdcgroup.com  and  twitter.com  may not be relevent
These are texts under https://www.cdcgroup.com/en/news-insight/insight/
number of items grabed are 26
number of items after filtering 5
These are texts under https://www.cdcgroup.com/en/home/im-looking-to-grow-my-business/
number of items grabed are 67
number of items after filtering 9
www.cdcgroup.com  and  www.youtube.com  may not be relevent
These are texts under https://www.cdcgroup.com/en/news-insight/news/podcast-has-impact-investing-gone-mainstream/
number of items grabe

www.fmo.nl  and  twitter.com  may not be relevent
www.fmo.nl  and  www.youtube.com  may not be relevent
www.fmo.nl  and  www.facebook.com  may not be relevent
These are texts under https://www.fmo.nl/news-detail/5b8c734c-e00f-4bc0-b067-1984269071f7/magazine-p-special-fmo-helps-small-scale-coastal-fishermen-preserve-their-catch
number of items grabed are 9
number of items after filtering 6
These are texts under https://www.fmo.nl/stakeholder-engagement
number of items grabed are 12
number of items after filtering 9
These are texts under https://www.fmo.nl/infrastructure-development-fund
number of items grabed are 12
number of items after filtering 5
url: https://nl.linkedin.com/company/fmo invalid
These are texts under https://www.fmo.nl/events-detail/1da1a4f7-2aed-4a73-af46-d20ec900532b/fmo-supports-the-world-s-first-global-gender-smart-investing-summit
These are texts under https://www.fmo.nl/news-detail/24249958-8c23-44a8-a087-1690b2642986/gcf-strengthens-partnership-with-fmo-to-comb

number of items grabed are 34
number of items after filtering 21
number of items grabed are 15
number of items after filtering 5
These are texts under https://www.norfund.no/
number of items grabed are 12
number of items after filtering 0
These are texts under https://norfund.no/investmentdetails/fotovoltaica-los-prados-sa-article12301-1042.html
number of items grabed are 17
number of items after filtering 6
These are texts under https://www.norfund.no/sme-funds/category1047.html
number of items grabed are 14
number of items after filtering 6
These are texts under https://norfund.no/vacant-positions/category1061.html
number of items grabed are 7
number of items after filtering 0
www.norfund.no  and  www.coretrek.no  may not be relevent
These are texts under https://www.norfund.no/clean-energy/category1049.html
number of items grabed are 32
number of items after filtering 14
These are texts under https://www.norfund.no/videos-and-stories/norfund-20-years-of-investing-for-development-art

number of items grabed are 27
number of items after filtering 9
starting to crawl main url:  https://www.sifem.ch
main_url:  https://www.sifem.ch  is not valid
starting to crawl main url:  https://www.ifc.org


these are the children links we crawled
['https://www.ifc.org'] 

starting to crawl main url:  https://www.credit-suisse
main_url:  https://www.credit-suisse  is not valid
starting to crawl main url:  https://www.jpmorganchase.
main_url:  https://www.jpmorganchase.  is not valid
starting to crawl main url:  https://www.gatesfoundation.org


these are the children links we crawled
['/How-We-Work/General-Information/Grant-Seeking-Resources', '/What-We-Do/Global-Development/Emergency-Response', '/How-We-Work/Resources/Grantee-Profiles', '/How-We-Work/General-Information/Open-Access-Policy', '/What-We-Do/Global-Growth-and-Opportunity/Financial-Services-for-the-Poor', 'https://discovergates.org/', '/What-We-Do/Global-Growth-and-Opportunity/Water-Sanitation-and-Hygiene', '/What-We-Do/

number of items grabed are 13
number of items after filtering 6
These are texts under https://www.gatesfoundation.org/Get-Involved
These are texts under https://www.gatesfoundation.org/Who-We-Are/General-Information/History
number of items grabed are 15
number of items after filtering 4
number of items grabed are 74
number of items after filtering 41
These are texts under https://www.gatesfoundation.org/What-We-Do/Global-Policy/Global-Education-Learning-Strategy
number of items grabed are 22
number of items after filtering 13
These are texts under https://www.gatesfoundation.org/Search
number of items grabed are 8
number of items after filtering 2
These are texts under https://www.gatesfoundation.org/What-We-Do/Global-Development/Nutrition
number of items grabed are 38
number of items after filtering 33
These are texts under https://www.gatesfoundation.org/Where-We-Work/Africa-Office
number of items grabed are 10
number of items after filtering 5
www.gatesfoundation.org  and  www.impat

url: https://www.linkedin.com/company/the-doen-foundation/ invalid
www.doen.nl  and  view.publitas.com  may not be relevent
url: https://www.doen.nl/web/home-1.htm/doens-network.htm invalid
url: https://www.doen.nl/web/home-1.htm/home.htm invalid
url: https://www.doen.nl/web/home-1.htm/applications/criteria.htm invalid
url: https://www.doen.nl/web/home-1.htm/doen-participaties-en/about.htm invalid
url: https://www.doen.nl/web/home-1.htm/doen-participaties-en/portfolio.htm invalid
url: https://www.doen.nl/web/home-1.htm/contact-us-1.htm invalid
url: https://www.doen.nl/web/home-1.htm/home-1.htm invalid
url: https://www.doen.nl/web/home-1.htm/netwerkkaart.htm invalid
url: https://www.doen.nl/web/home-1.htm/about-doen/annual-report.htm invalid
url: https://www.doen.nl/web/home-1.htm/doen-participaties-en/mission.htm invalid
These are texts under https://www.doen.nl/my-doen/applications-1.htm
number of items grabed are 0
number of items after filtering 0
url: https://www.doen.nl/web/home-1

url: http://www.linkedin.com/company/22806 invalid
www.omidyar.com  and  twitter.com  may not be relevent
www.omidyar.com  and  twitter.com  may not be relevent
www.omidyar.com  and  twitter.com  may not be relevent
www.omidyar.com  and  twitter.com  may not be relevent
These are texts under https://www.omidyargroup.com/
number of items grabed are 9
number of items after filtering 3
www.omidyar.com  and  www.facebook.com  may not be relevent
These are texts under https://www.omidyar.com/privacy-policy
number of items grabed are 115
number of items after filtering 44
These are texts under https://www.omidyar.com/our-work
number of items grabed are 9
number of items after filtering 9
www.omidyar.com  and  twitter.com  may not be relevent
These are texts under https://www.omidyar.com/investment-approach
number of items grabed are 11
number of items after filtering 11
These are texts under https://www.omidyar.com/offices/washington-dc
number of items grabed are 1
number of items after filt

These are texts under https://www.rockefellerfoundation.org/our-work/bellagio-center/conferences/
number of items grabed are 19
number of items after filtering 12
These are texts under https://www.rockefellerfoundation.org/about-us/news-media/
number of items grabed are 16
number of items after filtering 7
These are texts under https://www.rockefellerfoundation.org/our-work/topics/health/
These are texts under https://www.rockefellerfoundation.org/our-work/bellagio-center/residency-program/
number of items grabed are 19
number of items grabed are 25
number of items after filtering 13
number of items after filtering 15
These are texts under https://www.rockefellerfoundation.org/blog/
These are texts under https://www.rockefellerfoundation.org/about-us/governance-reports/
number of items grabed are 23
number of items after filtering 6
These are texts under https://www.rockefellerfoundation.org/our-work/topics/food/
number of items grabed are 14
number of items after filtering 6
number of

tonyelumelufoundation.org  and  twitter.com  may not be relevent
tonyelumelufoundation.org  and  twitter.com  may not be relevent
tonyelumelufoundation.org  and  twitter.com  may not be relevent
tonyelumelufoundation.org  and  twitter.com  may not be relevent
tonyelumelufoundation.org  and  twitter.com  may not be relevent
tonyelumelufoundation.org  and  twitter.com  may not be relevent
tonyelumelufoundation.org  and  twitter.com  may not be relevent
tonyelumelufoundation.org  and  twitter.com  may not be relevent
tonyelumelufoundation.org  and  twitter.com  may not be relevent
tonyelumelufoundation.org  and  twitter.com  may not be relevent
tonyelumelufoundation.org  and  twitter.com  may not be relevent
tonyelumelufoundation.org  and  twitter.com  may not be relevent
These are texts under http://www.tonyelumelufoundation.org/programmes/current-programme/
number of items grabed are 1
number of items after filtering 1
tonyelumelufoundation.org  and  twitter.com  may not be relevent
ton

url: https://www.abraaj.commailto: ABRAAJ@deloitte.com invalid
These are texts under https://www.abraaj.com/insights/news/press-releases/abraaj-submits-application-court-supervised-restructuring-facilitate-acquisition-process-protect-interests-creditors/
These are texts under https://www.abraaj.com/insights/blogs/insightsblogsmeet-myriam-ben-salah-curator-10th-edition-abraaj-group-art-prize/
number of items grabed are 3
These are texts under https://www.abraaj.com/the-firm/abraaj-values
number of items grabed are 15
number of items after filtering 1
number of items grabed are 9
These are texts under https://www.abraaj.com/insights/news/press-releases/the-abraaj-group-acquires-a-stake-in-indorama-fertilizers
These are texts under https://www.abraaj.com/insights/news/press-releases/the-abraaj-group-divests-its-stake-in-careem-to-kingdom-holding-company/
number of items after filtering 9
number of items grabed are 8
number of items grabed are 9
These are texts under https://www.abraaj.com

number of items grabed are 6
number of items after filtering 3
www.abraaj.com  and  twitter.com  may not be relevent
These are texts under https://www.abraaj.com/business/private-equity
number of items grabed are 15
number of items after filtering 10
These are texts under https://www.abraaj.com/insights/news/press-releases
number of items grabed are 479
These are texts under https://www.abraaj.com/talent/work-with-us/
These are texts under https://www.abraaj.com/business/impact-investing/
number of items grabed are 5
number of items grabed are 12
number of items after filtering 5
number of items after filtering 6
number of items after filtering 313
These are texts under https://www.abraaj.com/A-Statement-from-The-Abraaj-Group
These are texts under https://www.abraaj.com/insights/videos
number of items grabed are 2
number of items after filtering 2
number of items grabed are 284
number of items after filtering 187
These are texts under https://www.abraaj.com/investments/partner-companie

main_url:  https://www.acumenfund.org  is not valid
starting to crawl main url:  https://www.agrivie.com


these are the children links we crawled
['https://agrivie.com/media-center/perspectives/', 'https://agrivie.com/investment-approach/investment-period/', 'http://agrivie.com/investors-list/#investor-186', 'https://agrivie.com/investment-approach/', 'https://agrivie.com/investment-approach/partnership-commitment/', 'https://agrivie.com/media-center/news/', 'http://agrivie.com/investors-list/#investor-193', 'http://agrivie.com/investments/', 'https://agrivie.com/', 'http://agrivie.com/investors-list/#investor-204', 'https://agrivie.com/media-center/press-room/', 'http://agrivie.com/investors-list/#investor-205', 'https://www.agrivie.com', 'http://agrivie.com/investors-list/#investor-187', 'http://agrivie.com/investors-list/#investor-195', 'https://agrivie.com/investment-approach/investment-focus/', 'https://agrivie.com/investment-approach/investment-process/', 'http://agrivie.com/inv

url: http://unashamedlyethical.com/Home/default.asp invalid
www.ariyacapital.com  and  walvisbaypowerplant.com  may not be relevent
www.ariyacapital.com  and  www.duke-energy.com  may not be relevent
www.ariyacapital.com  and  sps.africa  may not be relevent
url: https://www.ariyacapital.commailto:info@mk-africa.com invalid
url: http://www.businessdailyafrica.com/Four-Kenyan-firms-picked-for-Sh1-3bn-green-energy-fund/-/539552/3028296/-/12qqi5bz/-/index.html invalid
starting to crawl main url:  https://www.bamboofinance.com
main_url:  https://www.bamboofinance.com  is not valid
starting to crawl main url:  https://www.blueorchard.com


these are the children links we crawled
['https://www.blueorchard.com/unctad-world-investment-forum-2/', 'https://www.blueorchard.com/category/news/news-releases/', 'https://www.blueorchard.com/investment-solutions/investment-fund/', 'https://www.blueorchard.com/mikrofinanz-eine-der-interessantesten-finanzdienstleistungen/', 'https://www.blueorchard.com/p

url: https://www.blueorchard.commailto:singapore@blueorchard.com invalid
These are texts under https://www.blueorchard.com/about-us/careers/
number of items grabed are 32
number of items after filtering 6
These are texts under https://www.blueorchard.com/
number of items grabed are 47
number of items after filtering 9
These are texts under https://www.blueorchard.com/investment-solutions/blended-finance-mandates/iif/
number of items grabed are 46
number of items after filtering 14
These are texts under https://www.blueorchard.com/investment-solutions/registration-form/
number of items grabed are 38
number of items after filtering 12
These are texts under https://www.blueorchard.com/sustainable-investing-emerging-markets-summit/
number of items grabed are 20
number of items after filtering 3
These are texts under https://www.blueorchard.com/investment-solutions/blended-finance-mandates/fighting-poverty-striving-women-empowerment/
number of items grabed are 43
number of items after filte

number of items after filtering 8
These are texts under https://www.blueorchard.com/investment-solutions/blueorchard-bond-fund/
number of items grabed are 57
number of items after filtering 15
These are texts under https://www.blueorchard.com/about-us/team/
number of items grabed are 75
number of items after filtering 44
These are texts under https://www.blueorchard.com/category/news/
number of items grabed are 41
number of items after filtering 13
starting to crawl main url:  https://www.catalystprincipal.com


these are the children links we crawled
['https://www.catalystprincipal.com/', 'https://www.catalystprincipal.com/burbridge-newsletter-interview-with-biniam-yohannes/', 'https://www.catalystprincipal.com/kenyan-pe-firm-raises-103mn-in-second-round-funding/', 'https://www.catalystprincipal.com/sustainability/', 'https://www.catalystprincipal.com/catalyst-acquires-britania-biscuits-jambo-foods/', 'https://www.catalystprincipal.com/catalyst-principal-partners-exits-investment-in-g

url: https://www.linkedin.com/company/grofin invalid
These are texts under http://www.grofin.com/language/en/business_support/#businesssupportguides
These are texts under http://www.grofin.com/language/en/business_support/#section1_supportbeyondfinance
These are texts under http://www.grofin.com/language/en/about_grofin/#section8_ourpeople
number of items grabed are 26
number of items grabed are 26
These are texts under http://www.grofin.com/language/en/business_support/#section2_howweworkwithsmes
number of items after filtering 16
number of items after filtering 16
number of items grabed are 40
These are texts under http://www.grofin.com/language/en/about_grofin/#section7_ourinvestorsandfunders
These are texts under http://www.grofin.com/language/en/about_grofin/#section5_thegrofinmodel
These are texts under http://www.grofin.com/language/en/about_grofin/#section4_ourobjective
number of items grabed are 26
number of items after filtering 23
number of items after filtering 16
number of

These are texts under https://www.heartcapital.co.za/get-involved
www.heartcapital.co.za  and  www.instagram.com  may not be relevent
number of items grabed are 41
number of items after filtering 7
www.heartcapital.co.za  and  www.facebook.com  may not be relevent
These are texts under https://www.heartcapital.co.za/legacy-gallery
number of items grabed are 163
number of items after filtering 74
starting to crawl main url:  https://www.ignite-fund.org
main_url:  https://www.ignite-fund.org  is not valid
starting to crawl main url:  https://www.iachl.com


these are the children links we crawled
['parent.php?root=portfolio&link=profiles', 'http://www.cdcgroup.com', 'parent.php?root=approach&link=strategy', 'parent.php?root=clients/', 'parent.php?root=about&link=team', 'parent.php?root=resources&link=news', 'parent.php?root=about&link=partners', 'parent.php?root=join&link=investors', 'http://www.tagedstudio.com', 'parent.php?root=join&link=experts', 'parent.php?root=apps/', 'http://www.a

www.mergence.co.za  and  twitter.com  may not be relevent
url: http://www.mergence.co.za/ideas/?tab=6 invalid
These are texts under http://www.mergence.co.za/
number of items grabed are 3
url: http://www.mergence.co.za/ideas/?tab=5 invalid
number of items after filtering 2
These are texts under http://www.mergence.co.za/convictions/impact-investing/uplifting-communities/
number of items grabed are 2
number of items after filtering 1
url: http://# invalid
url: https://www.linkedin.com/company/mergence-investment-managers/edit invalid
These are texts under http://www.mergence.co.za/legal/
number of items grabed are 30
number of items after filtering 21
www.mergence.co.za  and  www.youtube.com  may not be relevent
These are texts under http://www.mergence.co.za/legal/?tab=1
number of items grabed are 30
number of items after filtering 21
These are texts under https://www.mergence.co.za/expertise/
number of items grabed are 4
number of items after filtering 1
These are texts under https://

www.moringapartnership.com  and  www.linkedin.com  may not be relevent
www.moringapartnership.com  and  twitter.com  may not be relevent
www.moringapartnership.com  and  www.facebook.com  may not be relevent
These are texts under https://www.moringapartnership.com/agroforestry/
number of items grabed are 14
number of items after filtering 4
These are texts under https://www.moringapartnership.com/targets/
number of items grabed are 21
number of items after filtering 4
These are texts under https://www.moringapartnership.com/careers/
number of items grabed are 11
number of items after filtering 1
These are texts under https://www.moringapartnership.com/sustainability/
number of items grabed are 22
number of items after filtering 7
These are texts under https://www.moringapartnership.com/jus-delice/
number of items grabed are 15
number of items after filtering 5
These are texts under https://www.moringapartnership.com/vision/
number of items grabed are 13
number of items after filtering 

number of items grabed are 6
number of items after filtering 1
These are texts under https://www.oikocredit.coop/updates/newsletter
number of items grabed are 1
number of items after filtering 0
These are texts under https://www.oikocredit.coop/funding/where-we-work/regional-offices-in-africa
number of items grabed are 4
number of items after filtering 2
These are texts under https://www.oikocredit.coop/funding/where-we-work/regional-offices-in-latin-america
number of items grabed are 7
number of items after filtering 3
These are texts under https://www.oikocredit.coop/what-we-do/social-return/measuring-social-performance
number of items grabed are 8
number of items after filtering 7
These are texts under https://www.oikocredit.coop/updates/updates
number of items grabed are 2
number of items after filtering 0
These are texts under https://www.oikocredit.coop/what-we-do/social-return/driving-social-agenda
number of items grabed are 19
number of items after filtering 7
try succeeded
The

These are texts under https://www.phatisa.com/pe-news-app/
number of items grabed are 17
number of items after filtering 7
These are texts under https://www.phatisa.com/contact/
number of items grabed are 8
number of items after filtering 4
These are texts under https://www.phatisa.com/investor-portal/pahf-investor-portal/
number of items grabed are 8
number of items after filtering 5
These are texts under https://www.phatisa.com/portfolio/pahf-portfolio/
number of items grabed are 3
number of items after filtering 2
These are texts under https://www.phatisa.com/funds/pahf/
number of items grabed are 10
number of items after filtering 8
url: https://www.linkedin.com/company/phatisa invalid
These are texts under http://www.phatisaapp.com/click2sure-leve-des-fonds-aupres-de-la-societe-dassurance-greenlight-capital-re-incl-translation/
number of items grabed are 11
number of items after filtering 8
www.phatisa.com  and  play.google.com  may not be relevent
These are texts under http://www

url: https://www.linkedin.com/company/1522762 invalid
www.responsability.com  and  twitter.com  may not be relevent
These are texts under https://www.responsability.com/en/sectors
number of items grabed are 20
number of items after filtering 2
These are texts under https://www.responsability.com/en/private-equity
number of items grabed are 20
number of items after filtering 1
These are texts under https://www.responsability.com/en/about/portrait-figures
number of items grabed are 19
number of items after filtering 1
These are texts under https://www.responsability.com/en/about
number of items grabed are 17
number of items after filtering 0
These are texts under https://www.responsability.com/en/smallholder-farmers-increase-their-income-50-100
number of items grabed are 49
number of items after filtering 19
These are texts under https://www.responsability.com/en/capital-financing-0
These are texts under https://www.responsability.com/en/market-reactions-rising-interest-rates
number of i

number of items grabed are 15
number of items after filtering 6
These are texts under https://symbioticsgroup.com/governance/
number of items grabed are 20
number of items after filtering 8
try succeeded
www.symbioticsgroup.com  and  www.luxflag.org  may not be relevent
These are texts under https://symbioticsgroup.com/asset-management/
number of items grabed are 64
number of items after filtering 25
These are texts under https://symbioticsgroup.com/capacity-building/
number of items grabed are 34
number of items after filtering 10
These are texts under https://symbioticsgroup.com/news/symbiotics-at-the-social-good-summit-october-23rd/
number of items grabed are 13
number of items after filtering 7
These are texts under https://symbioticsgroup.com/team/
number of items grabed are 16
number of items after filtering 5
These are texts under https://symbioticsgroup.com/terms-of-use/
number of items grabed are 6
number of items after filtering 3
These are texts under https://symbioticsgroup

url: https://www.triodos.com/en/investment-management/customize-privacy-settings invalid
url: https://www.triodos.com/en/investment-management/business-development-and-investor-relations-team invalid
url: https://www.triodos.com/en/investment-management/socially-responsible-investing invalid
These are texts under https://www.triodos-im.com/articles/2018/how-achieve-impact-by-investing-in-listed-companies
number of items grabed are 35
number of items after filtering 25
url: https://www.triodos.com/en/investment-management/inclusive-finance invalid
url: https://www.triodos.com/en/investment-management{{articleUrl}} invalid
url: https://www.triodos.com/en/investment-management/funds/triodos-groenfonds invalid
url: https://www.triodos.com/en/investment-management/sustainable-food-and-agriculture invalid
www.triodos.com  and  triodosimpactreports.com  may not be relevent
www.triodos.com  and  triodosimpactreports.com  may not be relevent
url: https://www.triodos.com/en/investment-management

url: http://www.vcexperts.com invalid
These are texts under http://www.uvcmc.com/webmail
number of items grabed are 0
number of items after filtering 0
url: http://allafrica.com/stories/200901230177.html invalid
starting to crawl main url:  https://www.vital-capital.com


these are the children links we crawled
['https://vital-capital.com/vital-news/', 'https://vital-capital.com/category/team-members/', 'https://vital-capital.com/category/projects/', 'https://vital-capital.com/contact/', 'https://vital-capital.com/building-vitals-communities/', 'http://www.linkedin.com/company/vital-capital', 'https://vital-capital.com/geography/', 'https://vital-capital.com/about/', 'https://vital-capital.com/impactinvesting/', 'https://vital-capital.com/media-files/', 'https://vital-capital.com/collaborations-2/', 'https://www.vital-capital.com', 'https://www.facebook.com/VitalCapital', 'https://vital-capital.com', 'https://vital-capital.com/investors-relations/', 'https://twitter.com/Vital_Capital']

number of items grabed are 21
number of items after filtering 17
These are texts under https://www.bidnetwork.org/services/investors/investor-trip/
number of items grabed are 42
number of items after filtering 23
These are texts under https://www.bidnetwork.org/insights/
number of items grabed are 2
number of items after filtering 0
These are texts under https://www.bidnetwork.org/contact/
number of items grabed are 9
number of items after filtering 2
These are texts under https://www.bidnetwork.org/services/investors/ready-2-invest-master-classes/
number of items grabed are 9
number of items after filtering 5
These are texts under https://www.bidnetwork.org/blog/u-s-government-launches-nguriza-nshore-agricultural-finance-project/
number of items grabed are 10
number of items after filtering 4
These are texts under https://www.bidnetwork.org/about/
number of items grabed are 7
number of items after filtering 4
These are texts under https://www.bidnetwork.org/services/entrepreneurs/sele

These are texts under https://www.dalberg.com/our-ideas/walking-talk
number of items grabed are 4
number of items after filtering 2
These are texts under https://www.dalberg.com/our-ideas/financing-cataract-solutions-developing-world
number of items grabed are 18
number of items after filtering 11
These are texts under https://www.dalberg.com/what-we-do/dalberg-research
number of items grabed are 24
number of items after filtering 15
These are texts under https://dalberg.com/impact-investing-advisory
number of items grabed are 9
number of items after filtering 5
These are texts under https://www.dalberg.com/explore-snapshot-our-experience?area=environment
number of items grabed are 2
number of items after filtering 1
These are texts under https://www.dalberg.com/upcoming-events
These are texts under https://www.dalberg.com/who-we-are
number of items grabed are 9
number of items grabed are 1
number of items after filtering 2
number of items after filtering 0
These are texts under https:

url: mailto:?subject=OnlinePajak%20Raises%20%2425%20Million%20Series%20B%20with%20Participation%20from%20Endeavor%20Catalyst&body=OnlinePajak%2C%20the%20largest%20provider%20of%20tax%20compliance%20solutions%20in%20Indonesia%2C%20has%20raised%20over%20%2425%20million%20in%20Series%20B%20%26%238230%3B%0A%20https%3A%2F%2Fendeavor.org%2Fcatalyst%2Fonlinepajak-raises-25-million-series-b-participation-endeavor-catalyst%2F invalid
url: https://www.endeavor.orgjavascript:void(0); invalid
url: mailto:?subject=Endeavor%20Entrepreneurs%20Lead%20in%20Forbes%20Top%20100%20Startups%20in%20the%20Middle%20East%20Ranking&body=Fetchr%2C%20a%20Dubai-based%20delivery%20logistic%20company%20led%20by%20Endeavor%20Entrepreneur%20Idriss%20Al%20Rifai%2C%20ranked%20as%20number%20one%20on%20%26%238230%3B%0A%20https%3A%2F%2Fendeavor.org%2Fin-the-news%2Fendeavor-entrepreneurs-lead-forbes-top-100-startups-middle-east-ranking%2F invalid
url: http://briteweb.com invalid
url: https://www.linkedin.com/company/endeavor

url: https://www.impactamplifier.co.za/contact-us/ invalid
www.impactamplifier.co.za  and  us15.list-manage.com  may not be relevent
url: https://www.impactamplifier.co.za/fellowship-opportunities/ invalid
url: https://www.impactamplifier.co.za/access-to-impact-investment-capital/ invalid
url: https://www.impactamplifier.co.za/our-insights/reports/ invalid
url: https://www.impactamplifier.co.za/creating-ethical-supply-chains/ invalid
url: https://www.impactamplifier.co.za/entrepreneur-investment-and-ecosystem-research/ invalid
url: https://www.impactamplifier.co.za/team-partners/ invalid
url: https://www.impactamplifier.co.za/news/whats-news-sab-foundation-investment-readiness-accelerator-innovation-fund/ invalid
url: https://www.impactamplifier.co.za invalid
url: https://www.impactamplifier.co.za/news/google-impact-challenge-south-africa/ invalid
url: https://www.impactamplifier.co.za/acceleration-business-development-training/ invalid
www.impactamplifier.co.za  and  twitter.com  may 

number of items after filtering 2
These are texts under https://shellfoundation.org/people/
number of items grabed are 9
number of items after filtering 6
These are texts under https://shellfoundation.org/contact/
number of items grabed are 8
number of items after filtering 2
These are texts under https://shellfoundation.org/news/sure-chill-wins-barclays-innovation-award/
number of items grabed are 12
number of items after filtering 7
These are texts under https://shellfoundation.org/about/
number of items grabed are 9
number of items after filtering 5
These are texts under https://shellfoundation.org/cookie-policy/
number of items grabed are 25
number of items after filtering 13
These are texts under https://shellfoundation.org/trustees/
number of items grabed are 10
number of items after filtering 7
These are texts under https://shellfoundation.org/portfolio/globology/
number of items grabed are 5
number of items after filtering 2
These are texts under https://shellfoundation.org/por

number of items after filtering 14
These are texts under http://www.gsb.uct.ac.za/impact-investing
number of items grabed are 13
number of items after filtering 10
www.gsb.uct.ac.za  and  www.facebook.com  may not be relevent
These are texts under http://www.gsbbusinessreview.gsb.uct.ac.za/
number of items grabed are 3
number of items after filtering 1
These are texts under http://gsbblogs.uct.ac.za/berthacentre/
number of items grabed are 101
number of items after filtering 48
starting to crawl main url:  https://www.lbs.edu.ng


these are the children links we crawled
['https://www.lbs.edu.ng/lbsinsight/internationalisation-by-african-firms/', 'https://www.lbs.edu.ng/about-lbs/leadership-and-governance/', 'javascript:;', 'https://www.lbs.edu.ng/lagos-business-schools-dr-uchenna-uzo-wins-best-teaching-case-award-at-aib-conference/', 'https://www.lbs.edu.ng/privacy', 'https://www.lbs.edu.ng/faculty-and-research/research-news/', 'https://www.lbs.edu.ng/lbsinsight/strategy-execution-chal

number of items after filtering 23
These are texts under https://www.lbs.edu.ng/faculty-and-research/faculty-directory/
number of items grabed are 25
number of items after filtering 0
These are texts under https://www.lbs.edu.ng/lbs-insight/
number of items grabed are 30
number of items after filtering 0
www.lbs.edu.ng  and  www.youtube.com  may not be relevent
These are texts under https://www.lbs.edu.ng/faculty-and-research/academic-areas-and-departments/
number of items grabed are 25
number of items after filtering 0
These are texts under https://www.lbs.edu.ng/about-lbs/deans-message/
number of items grabed are 34
number of items after filtering 9
These are texts under https://www.lbs.edu.ng/about-lbs/careers-at-lbs/
number of items grabed are 39
number of items after filtering 11
These are texts under https://www.lbs.edu.ng/lbsinsight/navigating-the-complexities-in-logistics-and-supply-chain-management-in-the-african-market/
number of items grabed are 20
number of items after filt

www.sbs.strathmore.edu  and  twitter.com  may not be relevent
www.sbs.strathmore.edu  and  www.facebook.com  may not be relevent
url: https://www.sbs.strathmore.edueventcalendar invalid
www.sbs.strathmore.edu  and  www.wpdownloadmanager.com  may not be relevent
www.sbs.strathmore.edu  and  elearning.sbs.ac.ke  may not be relevent
These are texts under https://sbs.strathmore.edu/about-sbs/advancement/
number of items grabed are 21
number of items after filtering 11
These are texts under http://sbs.strathmore.edu/resources/sbs-facilities/
number of items grabed are 5
number of items after filtering 1
These are texts under https://sbs.strathmore.edu/about-sbs/memberships/
number of items grabed are 8
number of items after filtering 5
These are texts under https://sbs.strathmore.edu/executive-education/executive-talent-development/regional-academies/rwanda-leadership-development-academy/women-leadership-program-rwanda/
number of items grabed are 22
number of items after filtering 5
These a

url: https://sbs.strathmore.edu/executive-education/executive-talent-development/senior-management-programs/the-master-negotiator/ invalid
url: https://sbs.strathmore.edu/centers/center-for-sustainability-leadership/measuring-social-impact/ invalid
url: http://www.strathmore.edu/en/about-strathmore/policies invalid
url: https://sbs.strathmore.edu/executive-education/executive-talent-development/regional-academies/rwanda-leadership-development-academy/new-managers-leadership-program-rwanda/ invalid
url: http://www.iese.edu/ invalid
url: https://sbs.strathmore.edu/executive-education/public-policy-programs/public-private-partnership-executive-program/ invalid
url: https://sbs.strathmore.edu/academic-programs/master-of-management-in-agribusiness/ invalid
url: https://sbs.strathmore.edu/executive-education/executive-talent-development/professional-development-programmes/personal-financial-management-and-investment-decisions-programme/ invalid
url: https://sbs.strathmore.edu/our-programs/pr

url: https://sbs.strathmore.edu/executive-education/executive-talent-development/professional-development-programmes/ invalid
url: https://sbs.strathmore.edu/executive-education/executive-coaching-2/ invalid
url: https://sbs.strathmore.edu/centers/center-for-sustainability-leadership/community-economic-development-executive-program/ invalid
www.sbs.strathmore.edu  and  www.youtube.com  may not be relevent
url: http://sbs.strathmore.edu/eventcalendar/alumni-knowledge-session-go-find-be-found-make-your-digital-footprint-work-for-you/ invalid
url: https://sbs.strathmore.edu/executive-education/executive-talent-development/c-suite-level-programs/ invalid
try succeeded
www.sbs.strathmore.edu  and  www.gibs.co.za  may not be relevent
url: https://sbs.strathmore.edu/executive-education/executive-coaching-2/leadership-management-coaching/ invalid
url: https://sbs.strathmore.edu/executive-education/executive-talent-development/regional-academies/uganda-leadership-development-academy/senior-mana

These are texts under https://www.aspeninstitute.org/about/
number of items grabed are 63
number of items after filtering 22
www.aspeninstitute.org  and  www.buffalo.edu  may not be relevent
www.aspeninstitute.org  and  www.flickr.com  may not be relevent
www.aspeninstitute.org  and  www.instagram.com  may not be relevent
www.aspeninstitute.org  and  www.aspenideas.org  may not be relevent
These are texts under https://www.aspeninstitute.org/programs/aspen-wye-fellows/
number of items grabed are 28
number of items after filtering 7
These are texts under https://agln.aspeninstitute.org/home
number of items grabed are 14
number of items after filtering 2
These are texts under https://www.aspeninstitute.org/issues/energy-environment/
number of items grabed are 45
number of items after filtering 0
These are texts under https://www.aspeninstitute.org/page/2/
number of items grabed are 39
number of items after filtering 0
www.aspeninstitute.org  and  www.technologyreview.com  may not be rele

url: https://www.thegiin.orggiin/supporters invalid
url: https://www.thegiin.orggiin-research invalid
url: https://www.thegiin.orggiin-membership invalid
url: https://www.thegiin.orgimpact-investing/need-to-know/#how-do-impact-investments-perform-financially invalid
url: https://www.thegiin.orgtraining/ invalid
url: https://www.thegiin.orgresearch/publication/annualsurvey2018 invalid
url: https://www.thegiin.orgresearch/publication/financing-sdgs invalid
url: https://www.thegiin.orgresearch/publication/financial-performance invalid
url: https://www.thegiin.orgtools/ invalid
url: https://www.thegiin.orgimpact-investing/ invalid
url: https://www.thegiin.orgimpactbase invalid
url: https://www.thegiin.orgresearch invalid
url: https://www.thegiin.orgabout/ invalid
url: https://www.thegiin.orgmembership/ invalid
url: http://www.linkedin.com/company/the-global-impact-investing-network invalid
url: https://www.thegiin.orgresearch-and-opinions/ invalid
url: https://www.thegiin.orgcase-study/rep

url: https://b-analytics.net/giirs-ratings/contact invalid
url: https://b-analytics.net/giirs-ratings/content/site-map invalid
url: https://b-analytics.net/giirs-ratings/content/giirs-rating-process invalid
url: https://b-analytics.net/giirs-ratings/content/about-us invalid
url: https://b-analytics.net/giirs-ratings/improve invalid
url: https://b-analytics.net/giirs-ratings/partners invalid
url: https://b-analytics.net/giirs-ratings/giirs-ratings/pricing invalid
url: https://b-analytics.net/giirs-ratings/assess invalid
url: https://b-analytics.net/giirs-ratings/giirs-funds invalid
url: https://b-analytics.net/giirs-ratings/content/privacy-policy invalid
url: https://b-analytics.net/giirs-ratings/articles/sign-b-analytics-trial invalid
url: https://b-analytics.net/giirs-ratings/content/our-history invalid
url: https://b-analytics.net/giirs-ratings/content/non-profit-behind-b-analytics invalid
b-analytics.net  and  bcorporation.force.com  may not be relevent
url: https://b-analytics.net/

number of items grabed are 5
number of items after filtering 1
These are texts under https://cleancookstoves.org/technology-and-fuels/standards/voluntary-performance-targets.html
number of items grabed are 12
number of items after filtering 3
These are texts under https://cleancookstoves.org/impact-areas/women/
number of items grabed are 8
number of items after filtering 3
url: https://www.linkedin.com/company/global-alliance-for-clean-cookstoves invalid
These are texts under https://cleancookstoves.org/research-and-evaluation/measuring-progress/monitoring.html
number of items grabed are 2
number of items after filtering 0
cleancookstoves.org  and  hapit.shinyapps.io  may not be relevent
These are texts under https://cleancookstoves.org/market-development/supply-strengthening/our-portfolio.html
number of items grabed are 4
number of items after filtering 2
These are texts under https://cleancookstoves.org/market-development/supply-strengthening/our-funds.html
number of items grabed are

number of items grabed are 7
number of items after filtering 5
url: https://cleancookstoves.org/about/news/10-16-2018-five-things-to-know-about-iso-s-new-clean-cooking-performance-targets.html invalid
These are texts under https://cleancookstoves.org/events/
number of items grabed are 2
number of items after filtering 0
These are texts under https://cleancookstoves.org/research-and-evaluation/technology-and-fuels/
number of items grabed are 12
number of items after filtering 6
These are texts under https://cleancookstoves.org/research-and-evaluation/impact-area-research/
number of items grabed are 11
number of items after filtering 1
These are texts under https://cleancookstoves.org/impact-areas/
number of items grabed are 5
number of items after filtering 0
starting to crawl main url:  https://www.drkfoundation.org


these are the children links we crawled
['https://www.twitter.com/BayArea2019', 'https://www.twitter.com/laughsee', 'https://www.twitter.com/jimbildner', 'https://www.drk

number of items grabed are 4
number of items after filtering 1
These are texts under https://www.drkfoundation.org/apply-for-funding/
number of items grabed are 5
number of items after filtering 3
These are texts under https://www.drkfoundation.org/news-post/it-takes-consultation-to-help-a-village/
number of items grabed are 4
number of items after filtering 1
These are texts under https://www.drkfoundation.org/impact/
number of items grabed are 9
number of items after filtering 1
These are texts under https://www.drkfoundation.org/news-post/undocumented-children-become-part-of-foster-care-system/
number of items grabed are 4
number of items after filtering 1


In [11]:
database[database=="Main site not accessible"].count()

Organisation     0
Website          0
Crawled         44
dtype: int64

In [50]:
"""
Drop unaccessable organizations and weird cols
"""
database = pd.read_csv("crawled_database.csv")
database = database[database.Crawled != "Main site not accessible"]
database = database.dropna()
database = database.drop("Unnamed: 0", axis=1)
database = database.drop("Unnamed: 0.1", axis=1)
database = database.drop("Unnamed: 0.1.1", axis=1)

In [93]:
"""
add summary to each organization
"""
from gensim.summarization.summarizer import summarize
from gensim.summarization import keywords

def summarize_text(text):
    return summarize(text, word_count=100)

database["AutoSummary"] = np.vectorize(summarize_text)(database["Crawled"])
database

Unnamed: 0,Organisation,Website,Crawled,AutoSummary
0,The Africa Enterprise Challenge Fund (AECF),http://www.aecfafrica.org/,The AECF runs competitions targeting specific ...,"To achieve this, we will continue to focus on ..."
1,Acumen Fund,http://acumen.org/,Our entrepreneurs are unafraid to learn at the...,"But if you take each day at a time, there are ..."
2,African Development Fund,http://www.afdb.org/en/about-us/corporate-info...,IDEV is an independent unit tasked with enhanc...,IDEV is an independent unit tasked with enhanc...
3,Vista Ventures Social Impact Fund,http://www.vistaventures.com/,Vista Ventures Social Impact Fund focuses on s...,As practiced by Vista Ventures Social Impact F...
4,LGT Venture Philanthropy Foundation,https://www.lgtvp.com/en/,"At LGT Venture Philanthropy, we provide philan...",At LGT Capital Partners we want to develop tru...
5,Grofin,http://www.grofin.com,GroFin is pleased to announce that it is the w...,---AfghanistanAlbaniaAlgeriaAmerican SamoaAndo...
6,African Development Bank (AfDB),www.afdb.org,Africa’s slow progress on social indicators ca...,
7,CDC Group plc,www.cdcgroup.com,Impact investing is an industry that’s grown m...,"From a development perspective, we saw an oppo..."
8,Netherlands Development Finance Company (FMO),http://www.fmo.nl/,Choosing a career at FMO means being professio...,FMO’s nimble approach and experience in mobili...
9,Norwegian Investment,http://www.norfund.no/,Norfund’s mandate is to contribute to the deve...,And Norfund has been growing fast as a team – ...


In [None]:
"""
Save the processed database
"""
database.to_csv("crawled_database.csv")

In [33]:
class TextCleaner():
    """
    A class that cleans up text data
    """
    def __init__(self):
        # load stop words
        self.stop_words = stop_words.get_stop_words("en")

        # prepare stemmer
        self.stemmer = PorterStemmer()
    
    def clean(self, text):
        # remove non-alphanumerical letters
        text = re.sub("[^a-zA-Z]+", " ", text)
    
        # stem words
        text = self.stemmer.stem_sentence(text)
        
        return text

In [36]:
class TopicModelTrainer():
    """
    The class for training lsi model
    """
    def __init__(self, database_dir="crawled_database.csv"):
        """
        Initialize
        """
        
        # load database
        self.db = pd.read_csv(database_dir)
        
        # load string cleaner
        self.cl = TextCleaner()
        
    def train(self, model_dir="index_models"):
        """
        Train lsi model, including training of Dictionary, Tfidf and Lsi
        And Save them
        """
        # create index model folder
        try:
            os.mkdir(model_dir)
        except:
            pass
        
        # preprocess training documents
        # exclude all invalid text data
        training_docs = [row for row in self.db.iloc[:, -1] if row not in (np.nan, "Main site not accessible")]
        # clean the text
        training_docs = [self.cl.clean(doc) for doc in training_docs]
        
        # get dictionary
        self.dictionary = corpora.Dictionary([doc.split() for doc in training_docs])
        
        # remove organization specific words
        self.dictionary.filter_extremes(no_below=2, no_above=1)
        self.dictionary.compactify()
        
        # prepare tfidf
        training_bows = [self.dictionary.doc2bow(doc.split()) for doc in training_docs]
        self.tfidf = models.TfidfModel(training_bows)
        
        # prepare lsi model
        self.lsi = models.LsiModel(self.tfidf[training_bows], 300, id2word=self.dictionary)
        
        # save to file
        self.dictionary.save(model_dir + "/" + "dictionary")
        self.tfidf.save(model_dir + "/" + "tfidf")
        self.lsi.save(model_dir + "/" + "lsi")
        
    def index_text(self, text):
        """
        Index a given string use lsi model
        """
        if text not in (np.nan, "Main site not accessible"):
            # clean the text
            text = self.cl.clean(text)
            
            # get lsi index
            lsi_index = self.lsi[self.tfidf[self.dictionary.doc2bow(text.split())]]
            if(len(lsi_index)==0):
                return np.zeros(300)
            
            # get np.array of index
            lsi_index = np.array(list(zip(*lsi_index))[1])
            
            return lsi_index
            
        else: # discard empty texts
            return np.array([])
    
    def index_database(self, model_dir="index_models"):
        """
        Use topic model to index each investor
        And Save it
        """
        # index the database and append it to a new column
        self.db["Index"] = np.vectorize(self.index_text, otypes=[np.ndarray])(self.db["Crawled"])
        
        # save to file
        self.db.to_csv(model_dir + "/indexed_database.csv")

In [37]:
tt = TopicModelTrainer()
tt.train()
tt.index_database()

In [38]:
class TopicModelIndexer():
    """
    Class for find similarity of each orgnization to a given startup description
    """
    def __init__(self, folder_dir="index_models"):
        """
        Initialize
        """
        # load indexed database
        self.db = pd.read_csv(folder_dir + "/indexed_database.csv")
        print(self.db.columns)
        self.db["Index"] = self.db["Index"].apply(self.literal_eval)
        
        # load string cleaner
        self.cl = TextCleaner()
        
        # load lsi modules: dictionary, tfidf, lsi
        self.dictionary = corpora.Dictionary.load(folder_dir + "/dictionary")
        self.tfidf = models.TfidfModel.load(folder_dir + "/tfidf")
        self.lsi = models.LsiModel.load(folder_dir + "/lsi")
        
    def literal_eval(self, list_string):
        """
        convert string of list to list
        """
        # remove square bracket
        list_string = re.sub("[\[\]]", ' ', list_string)
        
        # convert str to list
        out = [np.float32(number) for number in list_string.split()]
        
        return np.array(out)

    def index_text(self, text):
        """
        Index a given string use lsi model
        """
        # clean the text
        text = self.cl.clean(text)

        # get lsi index
        lsi_index = self.lsi[self.tfidf[self.dictionary.doc2bow(text.split())]]

        # get np.array of index
        lsi_index = np.array(list(zip(*lsi_index))[1])

        return lsi_index
    
    def cos_sim(self, vector, text):
        """
        Dot to find cos similarity
        """
        if len(vector) == len(self.index_text(text)):
            return np.dot(vector, self.index_text(text))
        else:
            return 0
    
    def index_database(self, text, db_dir="indexed_database.csv"):
        """
        Use topic model to index each investor
        And Save it
        """
        # index the database and append it to a new column
        self.db["Similarity"] = np.vectorize(self.cos_sim)(self.db["Index"], text)
        
        # sort the database by its similarity value
        self.db.sort_values(by=["Similarity"], ascending=False)

In [39]:
ti = TopicModelIndexer()

Index(['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.1.1', 'Unnamed: 0.1.1',
       'Organisation', 'Website', 'Crawled', 'Index'],
      dtype='object')


In [40]:
ti.index_database("new start up aiming at low income customers, dedicated in green energy")
ti.db

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,Organisation,Website,Crawled,Index,Similarity
0,0,0,1,1,The Africa Enterprise Challenge Fund (AECF),http://www.aecfafrica.org/,The AECF runs competitions targeting specific ...,"[-0.38837832, 0.02878414, -0.31853217, 0.08206...",0.02549
1,1,1,4,4,Acumen Fund,http://acumen.org/,Our entrepreneurs are unafraid to learn at the...,"[-0.34838542, 0.07019102, 0.28139323, 0.008248...",0.029052
2,2,2,6,6,African Development Fund,http://www.afdb.org/en/about-us/corporate-info...,IDEV is an independent unit tasked with enhanc...,"[-0.07164744, 0.00592385, -0.0724005, -0.02336...",0.000419
3,3,3,8,8,Vista Ventures Social Impact Fund,http://www.vistaventures.com/,Vista Ventures Social Impact Fund focuses on s...,"[-0.29134637, -0.15178476, 0.01503241, -0.0529...",0.018046
4,4,4,11,11,LGT Venture Philanthropy Foundation,https://www.lgtvp.com/en/,"At LGT Venture Philanthropy, we provide philan...","[-0.1578331, 0.020682137, 0.04242708, -0.08402...",0.0032
5,5,5,12,12,Grofin,http://www.grofin.com,GroFin is pleased to announce that it is the w...,"[-0.21750526, -0.9691681, 0.048680473, 0.02132...",0.008338
6,6,6,20,20,African Development Bank (AfDB),www.afdb.org,Africa’s slow progress on social indicators ca...,"[-0.44114047, 0.034710385, -0.2825969, -0.0124...",0.026595
7,7,7,21,21,CDC Group plc,www.cdcgroup.com,Impact investing is an industry that’s grown m...,"[-0.262092, 0.06222852, 0.32046902, 0.07349992...",0.005988
8,8,8,23,23,Netherlands Development Finance Company (FMO),http://www.fmo.nl/,Choosing a career at FMO means being professio...,"[-0.13299, 0.01376124, -0.05555093, 0.04336066...",0.023636
9,9,9,24,24,Norwegian Investment,http://www.norfund.no/,Norfund’s mandate is to contribute to the deve...,"[-0.13694702, -0.02353018, -0.08218449, 0.1007...",0.014126
