In [1]:
pip install selenium

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.2.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import csv
import os
import subprocess
import json
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from concurrent.futures import ThreadPoolExecutor



In [4]:
def run_lighthouse(url):
    cmd = f"lighthouse {url} --output=json --quiet"
    result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    
    if result.returncode != 0:
        print(f"Error running Lighthouse command for {url}: {result.stderr.decode()}")
        return None
    else:
        try:
            return json.loads(result.stdout)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON for {url}: {e}")
            return None

In [5]:
def generate_report(url):
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run Chrome in headless mode
    driver = webdriver.Chrome(options=chrome_options)
    
    driver.get(url)
    report = run_lighthouse(url)
    
    driver.quit()
    
    return report

In [6]:
def save_report(report, filename, url, report_id):
    if report is not None:
        important_scores = {
            'id': report_id,
            'url': url,
            'performance': report['categories']['performance']['score'] * 100,
            'accessibility': report['categories']['accessibility']['score'] * 100,
            'best-practices': report['categories']['best-practices']['score'] * 100,
            'seo': report['categories']['seo']['score'] * 100
        }
        
        # Read the existing content if the file exists
        if os.path.exists(filename):
            with open(filename, 'r') as f:
                try:
                    data = json.load(f)
                except json.JSONDecodeError:
                    data = {'lighthouseReports': []}
        else:
            data = {'lighthouseReports': []}
        
        # Append the new report to the list
        data['lighthouseReports'].append(important_scores)
        
        # Write the updated data back to the file
        with open(filename, 'w') as f:
            json.dump(data, f, indent=2)
            print(f"Report saved for {filename}")
        
        # Print scores to the console
        print("Lighthouse Performance Metrics:")
        for category, score in important_scores.items():
            if category not in ['id', 'url']:
                print(f"{category.capitalize()} score: {score}")
    else:
        print(f"No report generated for {filename}")

In [7]:
def thread_report(csvreader):
    for report_id, row in enumerate(csvreader):
            url = 'https://' + row[0] + '/'
            print(url)
            report = generate_report(url)
            # filename = f"lighthouse_report_{url.replace('https://', '').replace('/', '_')}.json"
            save_report(report, "lighthouse_report.json", url, report_id)



In [9]:
def process_chunk(chunk, start_id):
    for report_id, row in enumerate(chunk):
        url = 'https://' + row[0] + '/'
        print(url)
        report = generate_report(url)
        save_report(report, "lighthouse_report.json", url, start_id + report_id)

def chunked_csv_reader(file_path, chunk_size):
    with open(file_path, newline='') as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader)  # Skip the header
        chunk = []
        for row in csvreader:
            chunk.append(row)
            if len(chunk) == chunk_size:
                yield chunk
                chunk = []
        if chunk:  # Yield the last chunk if it's not empty
            yield chunk

if __name__ == "__main__":
    chunk_size = 2  # Adjust chunk size as needed
    file_path = 'adrian_url.csv'

    with ThreadPoolExecutor(max_workers=10) as executor:  # Adjust number of workers as needed
        futures = []
        start_id = 0
        for chunk in chunked_csv_reader(file_path, chunk_size):
            futures.append(executor.submit(process_chunk, chunk, start_id))
            start_id += len(chunk)

        for future in futures:
            future.result()  # Wait for all futures to complete


https://pokemon.com/
https://vercel.com/
Report saved for lighthouse_report.json
Lighthouse Performance Metrics:
Performance score: 39.0
Accessibility score: 100
Best-practices score: 75.0
Seo score: 100
https://medium.com/
Error running Lighthouse command for https://pokemon.com/: Runtime error encountered: Lighthouse was unable to reliably load the URL you requested because the page stopped responding.

No report generated for lighthouse_report.json
https://viz.com/
Report saved for lighthouse_report.json
Lighthouse Performance Metrics:
Performance score: 69.0
Accessibility score: 86.0
Best-practices score: 79.0
Seo score: 100
Report saved for lighthouse_report.json
Lighthouse Performance Metrics:
Performance score: 30.0
Accessibility score: 83.0
Best-practices score: 75.0
Seo score: 85.0
