In [51]:
# letter frequency using single thread in sequential default manner.
import json
import urllib.request
import time


def count_letters(url, frequency):
    response = urllib.request.urlopen(url)
    txt = str(response.read())
    for l in txt:
        letter = l.lower()
        if letter in frequency:
            frequency[letter] += 1


def main():
    frequency = {}
    for c in "abcdefghijklmnopqrstuvwxyz":
        frequency[c] = 0
    start = time.time()
    for i in range(1000, 1020):
        count_letters(f"https://www.rfc-editor.org/rfc/rfc{i}.txt", frequency)
    end = time.time()
    print(json.dumps(frequency, indent=4))
    print("Done, time taken", end - start)


main()

{
    "a": 80014,
    "b": 16998,
    "c": 48003,
    "d": 40501,
    "e": 140093,
    "f": 26074,
    "g": 19010,
    "h": 36316,
    "i": 79913,
    "j": 2170,
    "k": 6614,
    "l": 38305,
    "m": 31176,
    "n": 135371,
    "o": 84258,
    "p": 32270,
    "q": 2835,
    "r": 75326,
    "s": 79790,
    "t": 103557,
    "u": 27572,
    "v": 10580,
    "w": 14195,
    "x": 4719,
    "y": 13914,
    "z": 1115
}
Done, time taken 46.20524001121521


In [57]:
# letter frequency using n multiple threads where n is the number of urls ie 20 in this case without LOCK

import json
import urllib.request
import time
from threading import Thread



def count_letters(url, frequency):
    response = urllib.request.urlopen(url)
    txt = str(response.read())
    
    for l in txt:
        letter = l.lower()
        if letter in frequency:
            frequency[letter] += 1


def main():
    frequency = {}
    for c in "abcdefghijklmnopqrstuvwxyz":
        frequency[c] = 0
    start = time.time()
    for i in range(1000, 1020):
        a=Thread(target=count_letters, args=(f"https://www.rfc-editor.org/rfc/rfc{i}.txt", frequency)).start()
    time.sleep(10)
    end = time.time()
    print(json.dumps(frequency, indent=4))
    print("Done, time taken", end - start)

main()

#and the time in this is not the correct time. As we used sleep for 10 sec.

#Note that the results are not same as the single thread output. This is because of the race condition.
#To solve this race condition we use mutex

{
    "a": 80014,
    "b": 16998,
    "c": 48003,
    "d": 40501,
    "e": 138470,
    "f": 26074,
    "g": 19010,
    "h": 36316,
    "i": 79913,
    "j": 2170,
    "k": 6614,
    "l": 38305,
    "m": 31176,
    "n": 135371,
    "o": 84258,
    "p": 32270,
    "q": 2835,
    "r": 75326,
    "s": 77546,
    "t": 103557,
    "u": 27572,
    "v": 10580,
    "w": 14195,
    "x": 4719,
    "y": 13914,
    "z": 1115
}
Done, time taken 10.13707685470581


In [50]:
# letter frequency using n multiple threads where n is the number of urls ie 20 in this case with LOCK
import json
import urllib.request
import time
from threading import Thread, Lock

finished_count = 0


def count_letters(url, frequency, mutex):
    response = urllib.request.urlopen(url)
    txt = str(response.read())
    mutex.acquire()
    for l in txt:
        letter = l.lower()
        if letter in frequency:
            frequency[letter] += 1
    global finished_count
    finished_count += 1
    mutex.release()


def main():
    frequency = {}
    mutex = Lock()
    for c in "abcdefghijklmnopqrstuvwxyz":
        frequency[c] = 0
    start = time.time()
    for i in range(1000, 1020):
        Thread(target=count_letters, args=(f"https://www.rfc-editor.org/rfc/rfc{i}.txt", frequency, mutex)).start()
    while True:
        mutex.acquire()
        if finished_count == 20:
            break
        mutex.release()
        time.sleep(0.5)
    end = time.time()
    print(json.dumps(frequency, indent=4))
    print("Done, time taken", end - start)


main()

{
    "a": 80014,
    "b": 16998,
    "c": 48003,
    "d": 40501,
    "e": 140093,
    "f": 26074,
    "g": 19010,
    "h": 36316,
    "i": 79913,
    "j": 2170,
    "k": 6614,
    "l": 38305,
    "m": 31176,
    "n": 135371,
    "o": 84258,
    "p": 32270,
    "q": 2835,
    "r": 75326,
    "s": 79790,
    "t": 103557,
    "u": 27572,
    "v": 10580,
    "w": 14195,
    "x": 4719,
    "y": 13914,
    "z": 1115
}
Done, time taken 7.2098917961120605
