In [2]:
import csv
import os
import subprocess
import json
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from concurrent.futures import ThreadPoolExecutor
from typing import List

# types
import concurrent

In [3]:
def run_lighthouse(url):
    cmd = f"lighthouse {url} --output=json --quiet"
    result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    
    if result.returncode != 0:
        print(f"Error running Lighthouse command for {url}: {result.stderr.decode()}")
        return None
    else:
        try:
            return json.loads(result.stdout)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON for {url}: {e}")
            return None

In [4]:
def generate_report(url):
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run Chrome in headless mode
    driver = webdriver.Chrome(options=chrome_options)
    
    driver.get(url)
    report = run_lighthouse(url)
    
    driver.quit()
    
    return report

In [5]:
def save_report(report, filename, url, report_id):
    if report is not None:
        important_scores = {
            'id': report_id,
            'url': url,
            'performance': report['categories']['performance']['score'] * 100,
            'accessibility': report['categories']['accessibility']['score'] * 100,
            'best-practices': report['categories']['best-practices']['score'] * 100,
            'seo': report['categories']['seo']['score'] * 100
        }
        
        # Read the existing content if the file exists
        if os.path.exists(filename):
            with open(filename, 'r') as f:
                try:
                    data = json.load(f)
                except json.JSONDecodeError:
                    data = {'lighthouseReports': []}
        else:
            data = {'lighthouseReports': []}
        
        # Append the new report to the list
        data['lighthouseReports'].append(important_scores)
        
        # Write the updated data back to the file
        with open(filename, 'w') as f:
            json.dump(data, f, indent=2)
            print(f"Report saved for {filename}")
        
        # Print scores to the console
        print("Lighthouse Performance Metrics:")
        for category, score in important_scores.items():
            if category not in ['id', 'url']:
                print(f"{category.capitalize()} score: {score}")
    else:
        print(f"No report generated for {filename}")

In [6]:
def thread_report(csvreader):
    for report_id, row in enumerate(csvreader):
            url = row[0]
            print(url)
            report = generate_report(url)
            # filename = f"lighthouse_report_{url.replace('https://', '').replace('/', '_')}.json"
            save_report(report, "lighthouse_report.json", url, report_id)



In [9]:
import concurrent.futures


def process_chunk(chunk, start_id):
    for report_id, row in enumerate(chunk):
        url = row[0]
        print(url, "\n")
        try:
            report = generate_report(url)
            save_report(report, "lighthouse_report.json", url, start_id + report_id)
        except Exception as e:
            print(
                "Something went wrong while generating the report"
            )  # Delete the try/except to see more details or print an "e"
            print(e)
            continue


def chunked_csv_reader(file_path, chunk_size):
    with open(file_path, newline="") as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader)  # Skip the header
        chunk = []
        for row in csvreader:
            chunk.append(row)
            if len(chunk) == chunk_size:
                yield chunk
                chunk = []
        if chunk:  # Yield the last chunk if it's not empty
            yield chunk


if __name__ == "__main__":
    chunk_size = 10  # Adjust chunk size as needed
    file_path = "./WebScreenshots.csv"

    with ThreadPoolExecutor(
        max_workers=10
    ) as executor:  # Adjust number of workers as needed
        futures: List[concurrent.futures._base.Future] = []
        start_id = 0
        for chunk in chunked_csv_reader(file_path, chunk_size):
            futures.append(executor.submit(process_chunk, chunk, start_id))
            start_id += len(chunk)

        for future in futures:
            future.result()  # Wait for all futures to complete

http://1k62.com 

Something went wrong while generating the report
Message: unknown error: net::ERR_NAME_NOT_RESOLVED
  (Session info: chrome-headless-shell=125.0.6422.77)
Stacktrace:
	GetHandleVerifier [0x00007FF7BFE31F22+60322]
	(No symbol) [0x00007FF7BFDACE99]
	(No symbol) [0x00007FF7BFC67EBA]
	(No symbol) [0x00007FF7BFC5FEA2]
	(No symbol) [0x00007FF7BFC509C4]
	(No symbol) [0x00007FF7BFC52212]
	(No symbol) [0x00007FF7BFC50CF1]
	(No symbol) [0x00007FF7BFC504D1]
	(No symbol) [0x00007FF7BFC50410]
	(No symbol) [0x00007FF7BFC4E39D]
	(No symbol) [0x00007FF7BFC4E9DC]
	(No symbol) [0x00007FF7BFC6AC91]
	(No symbol) [0x00007FF7BFCFC5FE]
	(No symbol) [0x00007FF7BFCDC21A]
	(No symbol) [0x00007FF7BFCFBC80]
	(No symbol) [0x00007FF7BFCDBFC3]
	(No symbol) [0x00007FF7BFCA9617]
	(No symbol) [0x00007FF7BFCAA211]
	GetHandleVerifier [0x00007FF7C014946D+3301613]
	GetHandleVerifier [0x00007FF7C0193693+3605267]
	GetHandleVerifier [0x00007FF7C0189410+3563664]
	GetHandleVerifier [0x00007FF7BFEE42F6+790390]
	