In [2]:
!pip install haralyzer
from haralyzer import HarParser, HarPage
import json

files = ['jagran.har','deccan.har','mit.har','usach.har','sinu.har']

for _ in files:
    with open(_, 'r') as f:
        har_parser = HarParser(json.loads(f.read()))
    print(_)
    data = har_parser.har_data
    # print((data['entries'][0].keys()))
    ttfb = data['pages'][0]['pageTimings']['onContentLoad']
    page_load_time = data['pages'][0]['pageTimings']['onLoad']
    
    print("TTFB:", ttfb)
    print("Page Load Time:", page_load_time)
    
    get_request_count = 0
    
    # Iterate through each entry in the .har file
    for entry in data['entries']:
        # Check if the request method is "GET"
        if entry['request']['method'] == 'GET':
            get_request_count += 1
    
    print("Total number of HTTP GET requests:", get_request_count)
    
    mime_types = set()
    
    # Iterate through each entry in the .har file
    for entry in data['entries']:
        # Get the MIME type from the response object
        mime_type = entry['response']['content']['mimeType']
        # Add the MIME type to the set
        mime_types.add(mime_type)
    
    print("Different MIME types observed in the requests:")
    for mime_type in mime_types:
        print(mime_type)
    
    image_count = 0
    javascript_count = 0
    html_css_count = 0
    total_get_requests = 0
    
    # Iterate through each entry in the .har file
    for entry in data['entries']:
        # Check if the request method is "GET"
        if entry['request']['method'] == 'GET':
            total_get_requests += 1
            # Get the MIME type from the response object
            mime_type = entry['response']['content']['mimeType']
            # Classify the request based on MIME type
            if mime_type.startswith('image'):
                image_count += 1
            elif mime_type == 'application/javascript':
                javascript_count += 1
            elif mime_type in ['text/html', 'text/css']:
                html_css_count += 1
    
    # Calculate the fractions
    total_requests = image_count + javascript_count + html_css_count
    image_fraction = image_count / total_requests
    javascript_fraction = javascript_count / total_requests
    html_css_fraction = html_css_count / total_requests
    
    # Print the results
    print("Fraction of GET requests corresponding to:")
    print("(a) Images:", image_fraction)
    print("(b) JavaScript:", javascript_fraction)
    print("(c) HTML and CSS:", html_css_fraction)
    
    total_size = 0
    image_size = 0
    image_sizes = []
    # Iterate through each entry in the .har file
    for entry in data['entries']:
        # Get the size of the asset from the response object
        
        asset_size = entry['response']['content']['size']
        total_size += asset_size
        # Get the MIME type from the response object
        mime_type = entry['response']['content']['mimeType']
        # If the MIME type corresponds to an image, update image-related variables
        if mime_type.startswith('image'):
            image_size += asset_size
            image_sizes.append(asset_size)
    
    # Calculate the fraction of the size that constitutes images
    image_fraction = image_size / total_size
    
    # Calculate the mean and median size of the images
    if image_sizes:
        mean_image_size = sum(image_sizes) / len(image_sizes)
        sorted_image_sizes = sorted(image_sizes)
        median_index = len(sorted_image_sizes) // 2
        median_image_size = sorted_image_sizes[median_index] if len(sorted_image_sizes) % 2 != 0 else (sorted_image_sizes[median_index - 1] + sorted_image_sizes[median_index]) / 2
    else:
        mean_image_size = 0
        median_image_size = 0
    
    # Print the results
    print("Total size of assets downloaded:", total_size)
    print("Fraction of the size constituted by images:", image_fraction)
    print("Mean size of images:", mean_image_size)
    print("Median size of images:", median_image_size)

    print('\n')

jagran.har
TTFB: 1026.3929999782704
Page Load Time: 2046.6629999573343
Total number of HTTP GET requests: 539
Different MIME types observed in the requests:
application/json+protobuf
image/svg+xml
video/mp4
font/woff2
image/jpeg
text/css
image/gif
application/x-javascript
image/webp
application/json
text/json
image/png
application/octet-stream
text/html
x-unknown
image/x-icon
text/javascript
application/vnd.yt-ump
application/javascript
text/plain
Fraction of GET requests corresponding to:
(a) Images: 0.6277173913043478
(b) JavaScript: 0.22826086956521738
(c) HTML and CSS: 0.14402173913043478
Total size of assets downloaded: 31714215
Fraction of the size constituted by images: 0.39599558116131833
Mean size of images: 54132.28017241379
Median size of images: 35793.0


deccan.har
TTFB: 2571.717000042554
Page Load Time: 11313.86900000507
Total number of HTTP GET requests: 185
Different MIME types observed in the requests:
application/x-javascript
image/gif
application/javascript
text/html