### 1. Write a function that takes a list of URLs as input and returns the ratio of products with null barcodes.

In [13]:
"""
This script contains the functions to calculate the ratio of products with null barcodes from a sample of product URLs.
1. The load_urls function loads the product URLs either from a file or via the get_urls function.
2. The process_urls function processes a subset of URLs to count products with null barcodes.
3. The check_barcode_null_ratio function calculates the ratio of products with null barcodes from a sample of product URLs.
"""

import os
from url_extractor import get_urls
from decorators import timer
import scraper
import pandas as pd
from concurrent.futures import ThreadPoolExecutor


def load_urls() -> list[str]:
    """
    Load product URLs either from a file or via the get_urls function.

    If a file named 'urls.txt' exists in the 'tests/files/' directory, the function will read the URLs from this file.
    Otherwise, it will call the get_urls function to retrieve the URLs.

    :return: a list of urls
    """
    target_path = os.path.join(os.getcwd(), 'tests/files/urls.txt')
    if os.path.exists(target_path):
        with open(target_path, 'r') as f:
            urls = [line.strip() for line in f.readlines()]
        return urls
    else:
        urls_dict = get_urls()
        return urls_dict['urls']


def process_urls(subset_urls, scraper_obj) -> tuple[int, list[dict]]:
    """
    Process a subset of URLs to count products with null barcodes.

    :param subset_urls: A subset of product URLs to process.
    :param scraper_obj: An instance of the Scraper class to use for extracting product details.
    :return: Count of products with null barcodes in the given subset of URLs and the list of product details.
    """
    null_count = 0
    product_details_subset_list = []
    for url in subset_urls:
        product_details = scraper_obj.scrape_details(url)
        product_details_subset_list.append(product_details)
        barcode = product_details.get('barcode')
        if barcode is None:
            null_count += 1
    return null_count, product_details_subset_list


@timer
def check_barcode_null_ratio(sample_size=250) -> tuple[float, int, pd.DataFrame]:
    """
    Calculate the ratio of products with null barcodes from a sample of product URLs.

    :param sample_size: Number of URLs to process. Defaults to 250.
    :return:  Ratio of products with null barcodes, rounded to three decimal places.
    """
    # Load the URLs
    urls = load_urls()
    # adjust the sample size if necessary
    if len(urls) < sample_size:
        sample_size = len(urls)

    # Split the URLs for multi-threading
    num_threads = 20  # Choose a suitable number of threads
    chunk_size = len(urls) // num_threads
    url_chunks = [urls[i:i + chunk_size] for i in range(0, len(urls), chunk_size)]

    # Initialize a scraper object for each thread
    scraper_objects = [scraper.Scraper() for _ in range(num_threads)]

    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        # Use zip to pair each url_chunk with its corresponding scraper object
        results = list(executor.map(process_urls, url_chunks, scraper_objects))
        
        # Unpack the results
        total_null_count = sum(results[i][0] for i in range(len(results)))
        product_details_list = [product_details for result in results for product_details in result[1]]

    # Close each scraper object after processing
    for scraper_obj in scraper_objects:
        scraper_obj.close()

    ratio = total_null_count / sample_size
    product_details_df = pd.DataFrame(product_details_list)
    return ratio.__round__(3), sample_size, product_details_df

### 2. Call the function from step 1 and print the null ratio, sample size, and the time it took to run check_barcode_null_ratio function.

In [14]:
null_ratio, sample_size, df = check_barcode_null_ratio()
print(f'Null ratio: {null_ratio.__round__(3)}')
print(f'sample size: {sample_size}')

check_barcode_null_ratio took 328.8042070865631 seconds
Null ratio: 0.188
sample size: 250


### 3. Save the product details to a CSV file and print out the dataframe.

In [15]:
df.to_csv('product_details.csv', index=False)
df

Unnamed: 0,url,title,description,price,attributes,barcode,images
0,https://www.trendyol.com/sail-lakers/beyaz-der...,Sail Lakers - Beyaz Deri Bağcıklı Erkek Günlük...,Bu ürün Sail Lakers tarafından gönderilecektir...,1.390 TL,"{'Materyal': 'Rugan', 'Topuk Boyu': 'Kısa Topu...",1000547780,[https://cdn.dsmcdn.com/ty996/product/media/im...
1,https://www.trendyol.com/gelincik/kahvaltilik-...,Gelincik Kahvaltılık Sürülebilir Doğal Katkısı...,Bu ürün GELİNCİK HAŞHAŞ tarafından gönderilece...,"1.933,90 TL",{},,[https://cdn.dsmcdn.com/ty312/product/media/im...
2,https://www.trendyol.com/zeyder-kids/kiz-bebek...,Zeyder Kids Kız Bebek Baskılı T-shirt ZEYDER377,Bu ürün Zeyder Kids tarafından gönderilecektir...,175 TL,"{'Kalıp': 'Standart', 'Materyal': 'Pamuklu', '...",ZEYDER0002052,[https://cdn.dsmcdn.com/ty934/product/media/im...
3,https://www.trendyol.com/nuclear/bubba-juice-1...,Nuclear Bubba Juice 10 ml Mix Aroma Kapruz Sak...,Bu ürün Nuclear Kimya tarafından gönderilecekt...,"77,99 TL",{},nk8014,[https://cdn.dsmcdn.com/ty934/product/media/im...
4,https://www.trendyol.com/midday/2-adet-evinize...,Midday 2 Adet Evinize Şömine Havasını Yaşataca...,Bu ürün Midday tarafından gönderilecektir.\nKa...,"399,90 TL","{'Duy Tipi': 'E27', 'Ampul Teknolojisi': 'Mum ...",orhan723a,[https://cdn.dsmcdn.com/ty692/product/media/im...
...,...,...,...,...,...,...,...
235,https://www.trendyol.com/homamia/isme-ozel-tek...,Homamia Isme Özel Tek Kişilik Mikro Saten Nevr...,Bu ürün Homamia tarafından gönderilecektir.\nK...,"789,89 TL","{'Materyal': 'Pamuklu', 'Çarşaf Türü': 'Çarşaf...",,[https://cdn.dsmcdn.com/ty789/product/media/im...
236,https://www.trendyol.com/michael-polo/siyah-4-...,mıchael polo Siyah 4 Bölmeli Fermuarlı Kapamal...,Bu ürün Gold Yıldız tarafından gönderilecektir...,"512,25 TL","{'Materyal': 'Suni Deri', 'Boyut': 'Orta', 'Re...",,[https://cdn.dsmcdn.com/ty988/product/media/im...
237,https://www.trendyol.com/avessa/havlu-bileklik...,Avessa Havlu Bileklik Siyah Hb-269 HB-269,Bu ürün Aplus tarafından gönderilecektir.\nKam...,"47,99 TL",{'Renk': 'Beyaz'},8690000762460,[https://cdn.dsmcdn.com/ty97/product/media/ima...
238,https://www.trendyol.com/aypeteks/koyu-mavi-ke...,AYPETEKS Koyu Mavi Keten Kutu Model Koltuk Sır...,Bu ürün AYPETEKS tarafından gönderilecektir.\n...,"215,63 TL","{'Materyal': 'Keten', 'Parça Sayısı': '1', 'Re...",,[https://cdn.dsmcdn.com/ty980/product/media/im...
