### Pyhton advanced practice problems

##### https://www.w3resource.com/python-exercises/advanced/index.php

##### 1. Write a Python function that performs matrix multiplication using list comprehensions.

In [7]:
mat1 = [[1,2,3], [4,5,6], [7,8,9]]
mat2 = [[9,8,7], [6,5,4], [3,2,1]]

num_rows_mat1 = len(mat1)
num_cols_mat2 = len(mat2[0])

if num_rows_mat1 == num_cols_mat2:
    mat_mul = [[sum(mat1[i][k] * mat2[k][j] for k in range(len(mat2))) for j in range(num_cols_mat2)] 
           for i in range(num_rows_mat1)]
    print(mat_mul) 
else:
    print(f"No of rows of 1st matrix and no of cols of 2nd matrix must be same.")

[[30, 24, 18], [84, 69, 54], [138, 114, 90]]


##### 2. Write a Python program to create a class-based decorator that logs the execution time of methods.

In [28]:
import time

def time_execution(func):
        def wrapper(*args, **kwargs):
            start_time = time.time()
            func(*args, **kwargs)
            end_time = time.time()
            print(f"Function executed in {end_time - start_time}")
        return wrapper

class Example:
    def __init__(self):
        print(f"This is an example class for the time based calculations of methods execution")

    @time_execution
    # @LogExecutionTime
    def my_ex_func(self, *args, **kwargs):
        sum = 0
        for num in args:
            sum += num
        print(f"sum = {sum}")
        for k, v in kwargs.items():
            print(f"{k} : {v}")
        for i in range(100000000):
            pass

ex1 = Example()
print(type(ex1))
ex1.my_ex_func(1,2,3,4,5,6,name="Karan",city="Surat",age=20)

This is an example class for the time based calculations of methods execution
<class '__main__.Example'>
sum = 21
name : Karan
city : Surat
age : 20
Function executed in 4.636344909667969


In [1]:
# 2nd way
import time

class LogExecutionTime:
    def __init__(self, func): 
        self.func = func

    def __get__(self, instance, owner):  # Define the descriptor method to handle instance methods
        return lambda *args, **kwargs: self(instance, *args, **kwargs)  # Return a lambda that passes the instance

    def __call__(self, *args, **kwargs):  # Make the class instance callable
        instance = args[0]
        start_time = time.time()  
        result = self.func(instance, *args[1:], **kwargs)
        end_time = time.time() 
        execution_time = end_time - start_time
        print(f"Execution time of {self.func.__name__}: {execution_time:.4f} seconds")
        return result

class ExampleClass:
    @LogExecutionTime
    def example_method(self): 
        for _ in range(10000000):
            pass

example = ExampleClass()
example.example_method()


Execution time of example_method: 0.4417 seconds


##### 3. Implement a Multi-threaded Web Scraper that respects robots.txt rules

In [2]:
import requests  
from bs4 import BeautifulSoup  
from concurrent.futures import ThreadPoolExecutor  
import urllib.robotparser  
from urllib.parse import urlparse, urljoin  

In [10]:
def is_allowed(url, url_agent="*"):
    parsed_url = urlparse(url)
    base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
    robots_url = urljoin(base_url, "robots.txt")

    rp = urllib.robotparser.RobotFileParser(robots_url)
    rp.set_url(robots_url)
    rp.read()

    return rp.can_fetch(url_agent, url)

In [30]:
url = "https://books.toscrape.com/catalogue/category/books/travel_2/index.html"
print(is_allowed(url))

True


In [20]:
def fetch_page(url):
    if not is_allowed(url):
        print(f"Scraping not allowed for {url}")
        return None

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, "html.parser")
            return soup
        else:
            print(f"Failed to fetch {url} with status code {response.status_code}")
    except Exception as e:
        print(f"Error occured: {e}")
    return None

In [32]:
soup = fetch_page(url)

In [27]:
def extract_links(soup, base_url):
    links = []
    if soup:
        for link in soup.find_all('a', href=True):
            full_url = urljoin(base_url, link['href'])
            links.append(full_url)
    return links

In [33]:
extract_links(soup, "https://books.toscrape.com")

['https://books.toscrape.com/index.html',
 'https://books.toscrape.com/index.html',
 'https://books.toscrape.com/books_1/index.html',
 'https://books.toscrape.com/books_1/index.html',
 'https://books.toscrape.com/index.html',
 'https://books.toscrape.com/mystery_3/index.html',
 'https://books.toscrape.com/historical-fiction_4/index.html',
 'https://books.toscrape.com/sequential-art_5/index.html',
 'https://books.toscrape.com/classics_6/index.html',
 'https://books.toscrape.com/philosophy_7/index.html',
 'https://books.toscrape.com/romance_8/index.html',
 'https://books.toscrape.com/womens-fiction_9/index.html',
 'https://books.toscrape.com/fiction_10/index.html',
 'https://books.toscrape.com/childrens_11/index.html',
 'https://books.toscrape.com/religion_12/index.html',
 'https://books.toscrape.com/nonfiction_13/index.html',
 'https://books.toscrape.com/music_14/index.html',
 'https://books.toscrape.com/default_15/index.html',
 'https://books.toscrape.com/science-fiction_16/index.html'

In [35]:
def scrape_urls(urls, max_workers=5):
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(fetch_page, url): url for url in urls}
        results = []
        for future in futures:
            result = future.result()
            if result:
                results.append(result)
        return results

In [41]:
def main():
    start_url = "https://books.toscrape.com/catalogue/category/books/travel_2/index.html"
    # start_url = "https://www.google.com"
    soup = fetch_page(start_url)
    if not soup:
        print(f"Can not scrape")
        return 
    links = extract_links(soup, start_url)
    pages = scrape_urls(links)

    for page in pages:
        if page:
            print(f"Title = {page.find('title').get_text()}")

In [42]:
main()

Title = 
    All products | Books to Scrape - Sandbox

Title = 
    All products | Books to Scrape - Sandbox

Title = 
    Books | 
     Books to Scrape - Sandbox


Title = 
    Books | 
     Books to Scrape - Sandbox


Title = 
    Travel | 
     Books to Scrape - Sandbox


Title = 
    Mystery | 
     Books to Scrape - Sandbox


Title = 
    Historical Fiction | 
     Books to Scrape - Sandbox


Title = 
    Sequential Art | 
     Books to Scrape - Sandbox


Title = 
    Classics | 
     Books to Scrape - Sandbox


Title = 
    Philosophy | 
     Books to Scrape - Sandbox


Title = 
    Romance | 
     Books to Scrape - Sandbox


Title = 
    Womens Fiction | 
     Books to Scrape - Sandbox


Title = 
    Fiction | 
     Books to Scrape - Sandbox


Title = 
    Childrens | 
     Books to Scrape - Sandbox


Title = 
    Religion | 
     Books to Scrape - Sandbox


Title = 
    Nonfiction | 
     Books to Scrape - Sandbox


Title = 
    Music | 
     Books to Scrape - Sandbox


Title =