In [5]:
import requests
import csv

In [6]:

API_KEY = "AIzaSyDX_kJ-HB9cs0Z8KYDvzhENTDWw3oKTTdQ"
API_URL = "https://pagespeedonline.googleapis.com/pagespeedonline/v5/runPagespeed"
INPUT_FILE = "../../Warehouse/Links/fam_links.csv"
OUTPUT_FILE = "website_metrics.csv"

In [7]:
def read_urls_from_csv(file):
    with open(file, "r", newline="", encoding="utf-8") as csvfile:
        reader = csv.reader(csvfile)
        urls = set()
        for row in reader:
            urls.add(row[0])
        return list(urls)

def evaluate_website(url):
    params = {"url": url, "key": API_KEY}
    response = requests.get(API_URL, params=params)
    response_json = response.json()

    if "lighthouseResult" not in response_json:
        return None

    overall_score = response_json["lighthouseResult"]["categories"]["performance"]["score"]

    metrics = {
        "url": url,
        "overall_score": format(overall_score, ".8f"),
        "first_contentful_paint": response_json["lighthouseResult"]["audits"]["first-contentful-paint"]["numericValue"],
        "speed_index": response_json["lighthouseResult"]["audits"]["speed-index"]["numericValue"],
        "largest_contentful_paint": response_json["lighthouseResult"]["audits"]["largest-contentful-paint"]["numericValue"],
        "interactive": response_json["lighthouseResult"]["audits"]["interactive"]["numericValue"],
    }
    return metrics

def save_results_to_csv(result, file):
    with open(file, "a", newline="", encoding="utf-8") as csvfile:
        fieldnames = ["url", "overall_score", "first_contentful_paint", "speed_index", "largest_contentful_paint", "interactive"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        if csvfile.tell() == 0:
            writer.writeheader()

        writer.writerow(result)

def save_failed_urls_to_csv(failed_urls, file):
    with open(file, "a", newline="", encoding="utf-8") as csvfile:
        fieldnames = ["url", "error"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for failed_url in failed_urls:
            writer.writerow(failed_url)

In [9]:
urls = read_urls_from_csv(INPUT_FILE)
total_urls = len(urls)
failed_urls = []

for index, url in enumerate(urls):
    try:
        result = evaluate_website(url)
        if result:
            save_results_to_csv(result, OUTPUT_FILE)
            print(f"Progress: {index + 1}/{total_urls} - URL: {url} - Status: Success")
        else:
            error_message = "'lighthouseResult' not found"
            failed_urls.append({"url": url, "error": error_message})
            print(f"Progress: {index + 1}/{total_urls} - URL: {url} - Status: Failed - Error: {error_message}")
    except Exception as e:
        error_message = str(e)
        failed_urls.append({"url": url, "error": error_message})
        print(f"Progress: {index + 1}/{total_urls} - URL: {url} - Status: Failed - Error: {error_message}")
        save_failed_urls_to_csv(failed_urls, "failed_urls.csv")

Progress: 1/166 - URL: https://fampay.in/blog/healthy-wealth-how-can-parents-have-a-healthy-discussion-about-money-with-their-children/ - Status: Failed - Error: unsupported format string passed to NoneType.__format__
Progress: 2/166 - URL: https://fampay.in/partner - Status: Success
Progress: 3/166 - URL: https://fampay.in/blog/author/shifa/ - Status: Success
Progress: 4/166 - URL: https://fampay.in/blog/zetas-fusion-to-power-fampays-cashless-payments-for-gen-z/ - Status: Failed - Error: unsupported format string passed to NoneType.__format__
Progress: 5/166 - URL: https://fampay.in/blog/pocket-money-for-teenagers-why-it-matters/ - Status: Failed - Error: unsupported format string passed to NoneType.__format__
Progress: 6/166 - URL: https://fampay.in/blog/this-fintech-startup-by-iit-roorkee-alum-is-giving-teenagers-financial-freedom-responsibly-yourstory/ - Status: Failed - Error: 'lighthouseResult' not found
Progress: 7/166 - URL: https://fampay.in/blog/author/priyanka/ - Status: Suc