In [33]:
import requests
import threading
import os
import datetime
import glob

In [None]:
# Using multiThreading (available in now)

In [34]:
url_input = input("Enter the URL: ")
url = url_input.split('?')[0] if url_input.count('?') > 1 else url_input
num_threads = 12
print(url) # Number of threads for concurrent downloads

http://ipv4.download.thinkbroadband.com/512MB.zip


In [35]:
# Step 1: Download the Large File with Split Downloads
def download_chunk(start, end, url, chunk_number):
    headers = {'Range': f'bytes={start}-{end}'}
    response = requests.get(url, headers=headers, stream=True)

    with open(f'chunk_{chunk_number}', 'wb') as file:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                file.write(chunk)
                print(chunk)

In [36]:
response = requests.head(url)
file_size = int(response.headers['content-length'])
chunk_size = file_size // num_threads
threads = []

In [None]:
for i in range(num_threads):
    start = i * chunk_size
    end = min((i + 1) * chunk_size - 1, file_size - 1)
    thread = threading.Thread(target=download_chunk, args=(start, end, url, i))
    threads.append(thread)
    thread.start()

for thread in threads:
    thread.join()

In [None]:
# Step 2: Combine the Files
def combine_files(chunk_prefix, output_file):
    with open(output_file, 'wb') as output_f:
        chunk_number = 0
        while True:
            try:
                with open(f'{chunk_prefix}_{chunk_number}', 'rb') as chunk_f:
                    chunk = chunk_f.read()
                    if not chunk:
                        break
                    output_f.write(chunk)
                chunk_number += 1
            except FileNotFoundError:
                break

time_now = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
chunk_prefix = 'chunk'  # Prefix used while splitting
output_file = f'file_{time_now}_.zip'  # Specify the desired output file name

combine_files(chunk_prefix, output_file)

In [None]:
# Step 3: Clean Up Split Files
def clean_up_chunks(chunk_prefix):
    for file_name in glob.glob(f'{chunk_prefix}_*'):
        os.remove(file_name)

clean_up_chunks(chunk_prefix)  # Clean up files with prefix 'chunk'

In [None]:
# Using ThreadPoolExecutor (Concurrency)
# Version 2

In [14]:
import requests
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed
import os
import datetime

In [15]:
url_input = input("Enter the URL: ")
url = url_input.split('?')[0] if url_input.count('?') > 1 else url_input
num_threads = 12
print(url) # Number of threads for concurrent downloads

http://ipv4.download.thinkbroadband.com/512MB.zip


In [16]:
def download_chunk(start, end, url, chunk_number):
    headers = {'Range': f'bytes={start}-{end}'}
    response = requests.get(url, headers=headers, stream=True)

    with open(f'chunk_{chunk_number}', 'wb') as file:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                file.write(chunk)

In [17]:
response = requests.head(url)
file_size = int(response.headers['content-length'])

chunk_size = file_size // num_threads

with ThreadPoolExecutor(max_workers=num_threads) as executor:
    # Create a list of futures for each thread and wait for them to finish before continuing.
    task = []
    for i in range(num_threads):
        start = i * chunk_size
        end = start + chunk_size
        task.append(executor.submit(download_chunk, start , end, url , i))

    for future in as_completed(task):
        future.result()
        print(future.result())
        print(future.exception())

    print('Done')

None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
Done


In [18]:
# Step 2: Combine the Files
def combine_files(chunk_prefix, output_file):
    with open(output_file, 'wb') as output_f:
        chunk_number = 0
        while True:
            try:
                with open(f'{chunk_prefix}_{chunk_number}', 'rb') as chunk_f:
                    chunk = chunk_f.read(1024)  # Read in 1KB chunks
                    while chunk:
                        output_f.write(chunk)
                        chunk = chunk_f.read(1024)
                chunk_number += 1
            except FileNotFoundError:
                break

In [19]:
filename_ = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
chunk_prefix = 'chunk'  # Prefix used while splitting
output_file = f'file_{filename_}_.zip'  # Specify the desired output file name

combine_files(chunk_prefix, output_file)

In [20]:
# Step 3: Clean Up Split Files
def clean_up_chunks(chunk_prefix):
    for file_name in os.listdir():
        if file_name.startswith(chunk_prefix):
            os.remove(file_name)

clean_up_chunks(chunk_prefix)  # Clean up files with prefix 'chunk'