In [14]:
from PythonPSI.api import PSI
import pandas as pd
from ipynb.fs.full.functions import *

### Notes
This function is pretty slow, because it takes a while to get all the needed data with the API, especially when calling a lot of urls. With 50 urls, it takes about `20 minutes` to run.

Not every url returns useable performance data, since the API has some restrictions. Some webpages require too many queries or the request simply fails. With 50 urls, about `30 urls` return useable data. The results can vary.

This function will need some refactoring at some point to enhance performance.

In [21]:
# returns a dict with the cls, fcp, fid and lcp from given websites dataframe with performance data
def GetSpecificPerformanceData(df):
    url = df.loc["CUMULATIVE_LAYOUT_SHIFT_SCORE","id"]
    cls = pd.DataFrame(df.loc["CUMULATIVE_LAYOUT_SHIFT_SCORE","metrics"]).percentile.iloc[0]
    fcp = pd.DataFrame(df.loc["FIRST_CONTENTFUL_PAINT_MS","metrics"]).percentile.iloc[0]
    fid = pd.DataFrame(df.loc["FIRST_INPUT_DELAY_MS","metrics"]).percentile.iloc[0]
    lcp = pd.DataFrame(df.loc["LARGEST_CONTENTFUL_PAINT_MS","metrics"]).percentile.iloc[0]
    results = {"URL":url,"FCP":fcp,"LCP":lcp,"FID":fid,"CLS":cls}
    return results

# returns dataframe with website performance data
def GetWebsiteSpeed(df):
    rows_list = []
    urls = df["Ur"]

    for key,value in websites.iteritems():
        # get data from Google Pagespeed Insights API
        data = PSI(value, category='performance', locale='en', stratergy='desktop', metrics='loadingExperience')
        data_keys = list(data.keys())
        
        # don't add row to list when API call gives error or data is incomplete
        if data_keys != ['error']:
            if data_keys != ['initial_url']:
                rows_list.append(GetSpecificPerformanceData(pd.DataFrame(data)))
                counter = counter+1   
        else:
            rows_list.append({"URL":value,"FCP":"NaN","LCP":"NaN","FID":"NaN","CLS":"NaN"})
        # API restricts from too many calls per minute, so wait a bit
        # this should be refactored to enhance performance
        time.sleep(12) 
       
    return pd.DataFrame(rows_list) 

In [22]:
d = {'Ur': ["https://www.bouwmaat.com/bouwmaterialen", 
            "https://www.bouwbestel.nl/bouwmaterialen.html",
            "https://www.hornbach.nl/shop/Bouwmateriaal/S4471/artikeloverzicht.html",
            "https://www.online-bouwmaterialen.nl/",
            "https://www.bouwonline.com/"]}
df = pd.DataFrame(data=d)
joe = GetWebsiteSpeed(df)
joe

['id', 'metrics', 'overall_category', 'initial_url']
https://www.bouwmaat.nl/bouwmaterialen
{'id': 'https://www.bouwmaat.nl/bouwmaterialen', 'metrics': {'CUMULATIVE_LAYOUT_SHIFT_SCORE': {'percentile': 82, 'distributions': [{'min': 0, 'max': 10, 'proportion': 0.08292563600782796}, {'min': 10, 'max': 25, 'proportion': 0.018835616438356205}, {'min': 25, 'proportion': 0.8982387475538158}], 'category': 'SLOW'}, 'FIRST_CONTENTFUL_PAINT_MS': {'percentile': 1216, 'distributions': [{'min': 0, 'max': 1000, 'proportion': 0.6569238140872065}, {'min': 1000, 'max': 3000, 'proportion': 0.30378533780546235}, {'min': 3000, 'proportion': 0.039290848107331196}], 'category': 'AVERAGE'}, 'FIRST_INPUT_DELAY_MS': {'percentile': 5, 'distributions': [{'min': 0, 'max': 100, 'proportion': 0.946424568482404}, {'min': 100, 'max': 300, 'proportion': 0.0206231786594934}, {'min': 300, 'proportion': 0.03295225285810355}], 'category': 'FAST'}, 'LARGEST_CONTENTFUL_PAINT_MS': {'percentile': 2474, 'distributions': [{'min'

['id', 'metrics', 'overall_category', 'initial_url']
https://www.sleiderink.nl/bouwmaterialen
{'id': 'https://www.sleiderink.nl/bouwmaterialen', 'metrics': {'CUMULATIVE_LAYOUT_SHIFT_SCORE': {'percentile': 1, 'distributions': [{'min': 0, 'max': 10, 'proportion': 0.8380721220527049}, {'min': 10, 'max': 25, 'proportion': 0.07662968099861306}, {'min': 25, 'proportion': 0.0852981969486824}], 'category': 'FAST'}, 'FIRST_CONTENTFUL_PAINT_MS': {'percentile': 635, 'distributions': [{'min': 0, 'max': 1000, 'proportion': 0.917490623934538}, {'min': 1000, 'max': 3000, 'proportion': 0.08250937606546199}, {'min': 3000, 'proportion': 0}], 'category': 'FAST'}, 'FIRST_INPUT_DELAY_MS': {'percentile': 5, 'distributions': [{'min': 0, 'max': 100, 'proportion': 1}, {'min': 100, 'max': 300, 'proportion': 0}, {'min': 300, 'proportion': 0}], 'category': 'FAST'}, 'LARGEST_CONTENTFUL_PAINT_MS': {'percentile': 814, 'distributions': [{'min': 0, 'max': 2500, 'proportion': 0.9905309435238415}, {'min': 2500, 'max': 4

['id', 'metrics', 'overall_category', 'initial_url', 'origin_fallback']
https://nl.wikipedia.org/wiki/Bouwmateriaal
{'id': 'https://nl.wikipedia.org', 'metrics': {'CUMULATIVE_LAYOUT_SHIFT_SCORE': {'percentile': 0, 'distributions': [{'min': 0, 'max': 10, 'proportion': 0.9503900780156029}, {'min': 10, 'max': 25, 'proportion': 0.008301660332066411}, {'min': 25, 'proportion': 0.04130826165233043}], 'category': 'FAST'}, 'FIRST_CONTENTFUL_PAINT_MS': {'percentile': 975, 'distributions': [{'min': 0, 'max': 1000, 'proportion': 0.766353270654134}, {'min': 1000, 'max': 3000, 'proportion': 0.21874374874975083}, {'min': 3000, 'proportion': 0.014902980596119256}], 'category': 'FAST'}, 'FIRST_INPUT_DELAY_MS': {'percentile': 3, 'distributions': [{'min': 0, 'max': 100, 'proportion': 0.9922984596919378}, {'min': 100, 'max': 300, 'proportion': 0.004500900180036004}, {'min': 300, 'proportion': 0.0032006401280255934}], 'category': 'FAST'}, 'LARGEST_CONTENTFUL_PAINT_MS': {'percentile': 1021, 'distributions'

['error']
https://www.bouwcenter.nl/driessen/bouwmaterialen/ : WEBSITE GIVES ERROR
COUNTER:  29
['id', 'metrics', 'overall_category', 'initial_url']
https://gebruiktebouwmaterialen.com/
{'id': 'https://gebruiktebouwmaterialen.com/', 'metrics': {'CUMULATIVE_LAYOUT_SHIFT_SCORE': {'percentile': 38, 'distributions': [{'min': 0, 'max': 10, 'proportion': 0.1484836549822759}, {'min': 10, 'max': 25, 'proportion': 0.3847971642378873}, {'min': 25, 'proportion': 0.46671918077983315}], 'category': 'SLOW'}, 'FIRST_CONTENTFUL_PAINT_MS': {'percentile': 1335, 'distributions': [{'min': 0, 'max': 1000, 'proportion': 0.4529780564263314}, {'min': 1000, 'max': 3000, 'proportion': 0.5262539184952967}, {'min': 3000, 'proportion': 0.020768025078369865}], 'category': 'AVERAGE'}, 'FIRST_INPUT_DELAY_MS': {'percentile': 3, 'distributions': [{'min': 0, 'max': 100, 'proportion': 0.9883720930232565}, {'min': 100, 'max': 300, 'proportion': 0.0072674418604651205}, {'min': 300, 'proportion': 0.004360465116279073}], 'ca

['error']
https://www.ggoedkoop.nl/ : WEBSITE GIVES ERROR
COUNTER:  40
['id', 'metrics', 'overall_category', 'initial_url', 'origin_fallback']
https://www.eco-bouwmaterialen.nl/
{'id': 'https://www.eco-bouwmaterialen.nl', 'metrics': {'CUMULATIVE_LAYOUT_SHIFT_SCORE': {'percentile': 57, 'distributions': [{'min': 0, 'max': 10, 'proportion': 0.23025768911055716}, {'min': 10, 'max': 25, 'proportion': 0.36845386533665875}, {'min': 25, 'proportion': 0.401288445552783}], 'category': 'SLOW'}, 'FIRST_CONTENTFUL_PAINT_MS': {'percentile': 2460, 'distributions': [{'min': 0, 'max': 1000, 'proportion': 0.06086238910666388}, {'min': 1000, 'max': 3000, 'proportion': 0.8136991953785843}, {'min': 3000, 'proportion': 0.1254384155147512}], 'category': 'AVERAGE'}, 'FIRST_INPUT_DELAY_MS': {'percentile': 7, 'distributions': [{'min': 0, 'max': 100, 'proportion': 0.9942733017377573}, {'min': 100, 'max': 300, 'proportion': 0.0023696682464454995}, {'min': 300, 'proportion': 0.003357030015797791}], 'category': 'FA

Amount of usable returns:  33
Run time:  19.297833383083344  minutes


Unnamed: 0,URL,FCP,LCP,FID,CLS
0,https://www.bouwmaat.nl/bouwmaterialen,1216.0,2474.0,5.0,82.0
1,https://www.hornbach.nl/shop/Bouwmateriaal/S44...,,,,
2,https://www.bouwbestel.nl/bouwmaterialen.html,841.0,1008.0,3.0,45.0
3,https://www.online-bouwmaterialen.nl/,984.0,1706.0,3.0,1.0
4,https://www.bmn.nl/,,,,
5,https://www.bouwonline.com/,,,,
6,https://www.bol.com/nl/m/bouwmaterialen/,528.0,609.0,3.0,7.0
7,https://www.pontmeyer.nl/,2679.0,3950.0,4.0,51.0
8,https://www.bouwmaterialenkopen.com/,,,,
9,https://www.debouwmarktshop.nl/,2473.0,2957.0,8.0,81.0
