In [1]:
import csv
import os
import subprocess
import json
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from concurrent.futures import ThreadPoolExecutor

In [8]:
def run_lighthouse(url):
    cmd = f"lighthouse {url} --output=json --quiet"
    result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    
    if result.returncode != 0:
        print(f"Error running Lighthouse command for {url}: {result.stderr.decode()}")
        return None
    else:
        try:
            return json.loads(result.stdout)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON for {url}: {e}")
            return None

In [9]:
def generate_report(url):
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run Chrome in headless mode
    driver = webdriver.Chrome(options=chrome_options)
    
    driver.get(url)
    report = run_lighthouse(url)
    
    driver.quit()
    
    return report

In [10]:
def save_report(report, filename, url, report_id):
    if report is not None:
        important_scores = {
            'id': report_id,
            'url': url,
            'performance': report['categories']['performance']['score'] * 100,
            'accessibility': report['categories']['accessibility']['score'] * 100,
            'best-practices': report['categories']['best-practices']['score'] * 100,
            'seo': report['categories']['seo']['score'] * 100
        }
        
        # Read the existing content if the file exists
        if os.path.exists(filename):
            with open(filename, 'r') as f:
                try:
                    data = json.load(f)
                except json.JSONDecodeError:
                    data = {'lighthouseReports': []}
        else:
            data = {'lighthouseReports': []}
        
        # Append the new report to the list
        data['lighthouseReports'].append(important_scores)
        
        # Write the updated data back to the file
        with open(filename, 'w') as f:
            json.dump(data, f, indent=2)
            print(f"Report saved for {filename}")
        
        # Print scores to the console
        print("Lighthouse Performance Metrics:")
        for category, score in important_scores.items():
            if category not in ['id', 'url']:
                print(f"{category.capitalize()} score: {score}")
    else:
        print(f"No report generated for {filename}")

In [11]:
def thread_report(csvreader):
    for report_id, row in enumerate(csvreader):
            url = 'https://' + row[0] + '/'
            print(url)
            report = generate_report(url)
            # filename = f"lighthouse_report_{url.replace('https://', '').replace('/', '_')}.json"
            save_report(report, "lighthouse_report.json", url, report_id)



In [13]:
def process_chunk(chunk, start_id):
    for report_id, row in enumerate(chunk):
        url = 'https://' + row[0] + '/'
        print(url)
        report = generate_report(url)
        save_report(report, "lighthouse_report.json", url, start_id + report_id)

def chunked_csv_reader(file_path, chunk_size):
    with open(file_path, newline='') as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader)  # Skip the header
        chunk = []
        for row in csvreader:
            chunk.append(row)
            if len(chunk) == chunk_size:
                yield chunk
                chunk = []
        if chunk:  # Yield the last chunk if it's not empty
            yield chunk

if __name__ == "__main__":
    chunk_size = 10  # Adjust chunk size as needed
    file_path = 'WebScreenshots.csv'

    with ThreadPoolExecutor(max_workers=10) as executor:  # Adjust number of workers as needed
        futures = []
        start_id = 0
        for chunk in chunked_csv_reader(file_path, chunk_size):
            futures.append(executor.submit(process_chunk, chunk, start_id))
            start_id += len(chunk)

        for future in futures:
            future.result()  # Wait for all futures to complete


https://http://1k62.com/https://http://altis.com.pl/

https://http://bardonsoliver.com/
https://http://car-tool.ru/
https://http://deltacorp.com.ua/
https://http://eko-klimat.pl/
https://http://etn.pl/
https://http://framingtech.com/
https://http://graffpinkert.com/
https://http://hipressuresystems.com/


WebDriverException: Message: unknown error: net::ERR_NAME_NOT_RESOLVED
  (Session info: chrome-headless-shell=124.0.6367.208)
Stacktrace:
	GetHandleVerifier [0x00007FF7C3D61522+60802]
	(No symbol) [0x00007FF7C3CDAC22]
	(No symbol) [0x00007FF7C3B97CE4]
	(No symbol) [0x00007FF7C3B905B3]
	(No symbol) [0x00007FF7C3B813E4]
	(No symbol) [0x00007FF7C3B82C14]
	(No symbol) [0x00007FF7C3B81711]
	(No symbol) [0x00007FF7C3B81049]
	(No symbol) [0x00007FF7C3B80D42]
	(No symbol) [0x00007FF7C3B7EBE4]
	(No symbol) [0x00007FF7C3B7F22C]
	(No symbol) [0x00007FF7C3B9A9F9]
	(No symbol) [0x00007FF7C3C2AB7E]
	(No symbol) [0x00007FF7C3C0AB7A]
	(No symbol) [0x00007FF7C3C2A224]
	(No symbol) [0x00007FF7C3C0A923]
	(No symbol) [0x00007FF7C3BD8FEC]
	(No symbol) [0x00007FF7C3BD9C21]
	GetHandleVerifier [0x00007FF7C40641BD+3217949]
	GetHandleVerifier [0x00007FF7C40A6157+3488183]
	GetHandleVerifier [0x00007FF7C409F0DF+3459391]
	GetHandleVerifier [0x00007FF7C3E1B8E6+823622]
	(No symbol) [0x00007FF7C3CE5FBF]
	(No symbol) [0x00007FF7C3CE0EE4]
	(No symbol) [0x00007FF7C3CE1072]
	(No symbol) [0x00007FF7C3CD18C4]
	BaseThreadInitThunk [0x00007FFA3D6E7344+20]
	RtlUserThreadStart [0x00007FFA3D8226B1+33]
