## Matrix Multiplication in Python Using list comprehensions

In [3]:
def matrix_multiplication(A, B):
    """
    Perform matrix multiplication of matrices A and B.

    Args:
    A: First matrix (list of lists).
    B: Second matrix (list of lists).

    Returns:
    Result of matrix multiplication (list of lists).
    """
    if len(A[0]) != len(B):
        raise ValueError("Number of columns in A must equal number of rows in B")

    # Number of rows and columns in resulting matrix
    num_rows_A = len(A)
    num_cols_B = len(B[0])

    # Perform matrix multiplication using list comprehension
    result = [[sum(A[i][k] * B[k][j] for k in range(len(B))) for j in range(num_cols_B)] for i in range(num_rows_A)]

    return result


A = [[1, 2, 3],[4, 5, 6]]
B = [[7, 8],[9, 10],[11, 12]]
print(matrix_multiplication(A, B))

[[58, 64], [139, 154]]


## Synthetic Data Generation Tool in Python

In [4]:
import pandas as pd
import numpy as np

class SyntheticDataGenerator:
    def __init__(self, num_rows, num_columns):
        self.num_rows = num_rows
        self.num_columns = num_columns
        self.data = None

    def generate_numeric_data(self, min_value=0, max_value=100):
        # Generate random numeric data
        self.data = pd.DataFrame(np.random.randint(min_value, max_value, size=(self.num_rows, self.num_columns)), 
                                 columns=[f"Column_{i}" for i in range(1, self.num_columns + 1)])
    
    def generate_categorical_data(self, categories=None, weights=None):
        # Generate random categorical data
        if categories is None:
            categories = ['Category_A', 'Category_B', 'Category_C']
        if weights is None:
            weights = [0.5, 0.3, 0.2]
        self.data = pd.DataFrame(np.random.choice(categories, size=(self.num_rows, self.num_columns), p=weights), 
                                 columns=[f"Column_{i}" for i in range(1, self.num_columns + 1)])
    
    def generate_dates(self, start_date='2020-01-01', end_date='2021-12-31', format='%Y-%m-%d'):
        # Generate date data
        start_date = pd.to_datetime(start_date)
        end_date = pd.to_datetime(end_date )
        self.data = pd.DataFrame({'Date': pd.date_range(start=start_date, end=end_date, periods=self.num_rows)})
    
    def save_data(self, filename='synthetic_data.csv'):
        # Save generated data to a CSV file
        self.data.to_csv(filename, index=False)

# Example usage
if __name__ == "__main__":
    # Initialize data generator
    data_generator = SyntheticDataGenerator(num_rows=1000, num_columns=5)
    # Generate numeric data
    data_generator.generate_numeric_data()
    # Generate categorical data
    data_generator.generate_categorical_data()
    # Generate dates
    data_generator.generate_dates()
    # Save generated data to a CSV file
    data_generator.save_data('synthetic_data.csv')

In [None]:
import requests  # Import the requests module to handle HTTP requests
from bs4 import BeautifulSoup  # Import BeautifulSoup for parsing HTML
from concurrent.futures import ThreadPoolExecutor  # Import ThreadPoolExecutor for multi-threading
import urllib.robotparser  # Import robotparser to handle robots.txt rules
from urllib.parse import urlparse, urljoin  # Import urlparse and urljoin for URL manipulation

# Function to check if a URL is allowed to be scraped according to robots.txt
def is_allowed(url, user_agent='*'):
    # Parse the URL to get the base URL
    parsed_url = urlparse(url)
    base_url = f'{parsed_url.scheme}://{parsed_url.netloc}'
    robots_url = urljoin(base_url, 'robots.txt')
    
    # Parse robots.txt
    rp = urllib.robotparser.RobotFileParser()
    rp.set_url(robots_url)
    rp.read()
    
    # Check if the URL is allowed to be accessed
    return rp.can_fetch(user_agent, url)

# Function to fetch and parse a webpage
def fetch_page(url):
    # Check if the URL is allowed to be scraped
    if not is_allowed(url):
        print(f'Scraping not allowed for {url}')
        return None
    
    try:
        # Send a GET request to the URL
        response = requests.get(url)
        # Check if the request was successful
        if response.status_code == 200:
            print(f'Successfully fetched {url}')
            # Parse the HTML content of the page
            soup = BeautifulSoup(response.content, 'html.parser')
            return soup
        else:
            print(f'Failed to fetch {url} with status code {response.status_code}')
    except Exception as e:
        print(f'Exception occurred while fetching {url}: {e}')
    return None

# Function to extract all links from a webpage
def extract_links(soup, base_url):
    links = []
    if soup:
        # Find all anchor tags with href attribute
        for link in soup.find_all('a', href=True):
            # Resolve relative URLs
            full_url = urljoin(base_url, link['href'])
            links.append(full_url)
    return links

# Function to scrape a list of URLs using multiple threads
def scrape_urls(urls, max_workers=5):
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submit fetch_page tasks to the ThreadPoolExecutor
        futures = {executor.submit(fetch_page, url): url for url in urls}
        results = []
        for future in futures:
            result = future.result()
            if result:
                results.append(result)
        return results

# Main function to start the web scraper
def main():
    start_url = 'https://books.toscrape.com/'  # Replace with the URL you want to start scraping from
    #start_url = 'https://google.com'  # Replace with the URL you want to start scraping from
    soup = fetch_page(start_url)
    if not soup:
        return
    
    # Extract links from the start page
    links = extract_links(soup, start_url)
    # Scrape the extracted links
    pages = scrape_urls(links)
    
    # Optionally, you can further process the scraped pages
    for page in pages:
        # Example: print the title of each page
        if page:
            title = page.find('title').get_text()
            print(f'Page title: {title}')

if __name__ == '__main__':
    main()