In [None]:
#!pip install adblockparser



In [30]:
from adblockparser import AdblockRules
import json
from urllib.parse import urlparse

def read_harfile(harfile_path, rules):
    harfile = open(harfile_path, encoding="utf-8")
    harfile_json = json.load(harfile)
    requests_count = {'a': 0, 'b': 0, 'c': 0}

    # Iterate through the entries in the HAR file
    for entry in harfile_json['log']['entries']:
        url = entry['request']['url']
        content_type = entry['response']['content']['mimeType']

        if rules.should_block(url, {'third-party': True}):
            requests_count['a'] += 1
        elif 'scorecardresearch.com' in urlparse(url).netloc and content_type.startswith('image'):
            if rules.should_block(url, {'domain': True, 'image': True}):
                requests_count['b'] += 1
        elif 'doubleclick.net' in urlparse(url).netloc and content_type.startswith('text/javascript'):
            if rules.should_block(url, {'script': True}):
                requests_count['c'] += 1

    return requests_count

# Create a parser for AdBlock Plus filter rules
filter_rules = {
    'a': ['*cookiesync?*'],
    'b': ['||scorecardresearch.com^$image'],
    'c': ['||doubleclick.net^$script']
}

rules = AdblockRules(filter_rules['a'] + filter_rules['b'] + filter_rules['c'])

harfile_path = 'www.cnn.com.har'
blocked_requests_count = read_harfile(harfile_path, rules)
print('# of HTTP requests blocked:')
print("Block any request containing ‘cookiesync?’ string:", blocked_requests_count['a'])
print("Block any image (e.g., jpg, gif etc.) loading from scorecardresearch.com:", blocked_requests_count['b'])
print("Block any script loading from doubleclick.net:", blocked_requests_count['c'])


# of HTTP requests blocked:
Block any request containing ‘cookiesync?’ string: 2
Block any image (e.g., jpg, gif etc.) loading from scorecardresearch.com: 3
Block any script loading from doubleclick.net: 3
