In [2]:
from bs4 import BeautifulSoup
import pandas as pd
import re
import os

# Directory containing HTML files
html_files_directory = '/Users/nirmalkapilarathne/Desktop/lighthouse_dekstop/React'

# Initialize an empty list to store DataFrames
all_data_frames = []

# Iterate over HTML files in the directory
for filename in os.listdir(html_files_directory):
    if filename.endswith(".html"):
        file_path = os.path.join(html_files_directory, filename)
        print("Processing file:", file_path)

        # Read HTML content from the file
        with open(file_path, 'r', encoding='utf-8') as file:
            html_content = file.read()

        # Parse HTML with BeautifulSoup
        soup = BeautifulSoup(html_content, 'html.parser')

        # Extract data
        data = {}
        metrics = ["First Contentful Paint", "Largest Contentful Paint",
                   "Total Blocking Time", "Cumulative Layout Shift", "Speed Index"]

        for metric in metrics:
            pattern = re.compile(f'{metric}.*?"displayValue":"(.*?)"')
            match = pattern.search(html_content)
            if match:
                value = match.group(1).strip()
                data[metric] = value
                print(f"Extracted {metric}: {value}")

        # Extract performance score
        performance_pattern = re.compile(r'"id":"performance","score":(.*?)}')
        performance_match = performance_pattern.search(html_content)
        if performance_match:
            performance_score = float(performance_match.group(1)) * 100
            data['Performance'] = performance_score
            print("Extracted Performance Score:", performance_score)

        # Append data to the list
        all_data_frames.append(pd.DataFrame([data]))
        print("Data extracted from file:", filename)

# Concatenate all DataFrames into a single DataFrame
all_data = pd.concat(all_data_frames, ignore_index=True)

# Save to CSV
all_data.to_csv('react_all_performance_metrics.csv', index=False)
print("Data saved to CSV file: react_all_performance_metrics.csv")
