<a href="https://colab.research.google.com/github/SabirDivs/Python-Scripts/blob/main/DownloadWebsiteAssets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install requests beautifulsoup4



In [9]:
import os
import re
import sys
import requests
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup
import mimetypes

class WebsiteDownloader:
    def __init__(self, base_url, output_dir):
        self.base_url = base_url
        self.output_dir = os.path.abspath(output_dir)
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Connection': 'keep-alive',
        })
        self.visited_urls = set()
        self.asset_map = {}
        self.base_domain = urlparse(base_url).netloc

    def sanitize_path(self, path):
        """Create safe filesystem paths from URLs"""
        path = re.sub(r'[^\w\-_\.]', '_', path)
        return path.lstrip('/')

    def get_resource_path(self, url, content_type=None):
        """Generate local path for a resource"""
        parsed = urlparse(url)
        path = parsed.path or '/index.html'

        if path.endswith('/'):
            path += 'index.html'

        # Determine filename with extension
        filename = os.path.basename(path)
        if '.' not in filename or '.' in urlparse(url).path.split('/')[-1][:5]:
            if content_type:
                ext = mimetypes.guess_extension(content_type.split(';')[0]) or ''
                filename = f"{filename}{ext}" if filename else f"resource{ext}"
            else:
                filename = filename or "resource"

        domain_path = self.sanitize_path(parsed.netloc)
        dir_path = os.path.dirname(path)
        if dir_path != '/':
            safe_dir = self.sanitize_path(dir_path)
            resource_path = os.path.join(domain_path, safe_dir, filename)
        else:
            resource_path = os.path.join(domain_path, filename)

        return resource_path

    def download_resource(self, url, referer=None):
        """Download resource and return its local path"""
        if url in self.asset_map:
            return self.asset_map[url]

        try:
            # Handle relative URLs
            if not urlparse(url).scheme:
                url = urljoin(self.base_url, url)

            headers = {'Referer': referer} if referer else {}
            response = self.session.get(url, headers=headers, timeout=10, stream=True)
            response.raise_for_status()

            # Determine content type
            content_type = response.headers.get('Content-Type', '').split(';')[0]
            if not content_type:
                content_type = mimetypes.guess_type(url)[0] or 'application/octet-stream'

            # Generate local path
            local_relative_path = self.get_resource_path(url, content_type)
            local_path = os.path.join(self.output_dir, local_relative_path)

            # Ensure directory exists
            os.makedirs(os.path.dirname(local_path), exist_ok=True)

            # Save content
            with open(local_path, 'wb') as f:
                for chunk in response.iter_content(8192):
                    f.write(chunk)

            self.asset_map[url] = local_relative_path
            print(f"Downloaded: {url} => {local_relative_path}")
            return local_relative_path

        except Exception as e:
            print(f"Failed to download {url}: {str(e)}")
            return None

    def process_css(self, content, css_url):
        """Process CSS content and download nested resources"""
        def replace_url(match):
            url = match.group(1).strip('\'"')
            if url.startswith(('data:', 'http:', 'https:')):
                return match.group(0)

            abs_url = urljoin(css_url, url)
            local_path = self.download_resource(abs_url, referer=css_url)
            return f"url('{local_path}')" if local_path else match.group(0)

        return re.sub(r'url\([\'"]?(.*?)[\'"]?\)', replace_url, content)

    def process_html(self, content, page_url):
        """Process HTML content and download linked resources"""
        try:
            soup = BeautifulSoup(content, 'html.parser')
        except:
            soup = BeautifulSoup(content, 'html.parser')

        # Update resource tags
        for tag, attr in [
            ('link', 'href'),
            ('script', 'src'),
            ('img', 'src'),
            ('source', 'src'),
            ('audio', 'src'),
            ('video', 'src'),
            ('iframe', 'src'),
            ('embed', 'src'),
            ('object', 'data'),
            ('meta', 'content'),
            ('img', 'srcset')
        ]:
            for element in soup.find_all(tag, **{attr: True}):
                urls = []
                if attr == 'srcset':
                    # Handle srcset with multiple URLs
                    srcset = element[attr]
                    parts = [p.strip() for p in srcset.split(',') if p.strip()]
                    for part in parts:
                        if ' ' in part:
                            url_part = part.split(' ', 1)[0]
                        else:
                            url_part = part
                        if not url_part.startswith(('data:', 'javascript:', 'mailto:')):
                            abs_url = urljoin(page_url, url_part)
                            local_path = self.download_resource(abs_url, referer=page_url)
                            if local_path:
                                part = part.replace(url_part, local_path)
                        urls.append(part)
                    element[attr] = ', '.join(urls)
                else:
                    url = element.get(attr)
                    if url and not url.startswith(('data:', 'javascript:', 'mailto:')):
                        abs_url = urljoin(page_url, url)
                        local_path = self.download_resource(abs_url, referer=page_url)
                        if local_path:
                            element[attr] = local_path

        # Process inline styles
        for element in soup.find_all(style=True):
            style = element['style']
            style = re.sub(
                r'url\([\'"]?(.*?)[\'"]?\)',
                lambda m: f"url('{self.download_resource(urljoin(page_url, m.group(1)), page_url)}')"
                          if not m.group(1).startswith('data:')
                          else m.group(0),
                style
            )
            element['style'] = style

        # Process CSS in style tags
        for style_tag in soup.find_all('style'):
            if style_tag.string:
                style_tag.string = self.process_css(style_tag.string, page_url)

        # Update links to other pages
        for a_tag in soup.find_all('a', href=True):
            href = a_tag['href']
            if href and not href.startswith(('#', 'javascript:', 'mailto:')):
                abs_url = urljoin(page_url, href)
                if urlparse(abs_url).netloc == self.base_domain:
                    a_tag['href'] = self.download_page(abs_url) or href

        # Update form actions
        for form_tag in soup.find_all('form', action=True):
            action = form_tag['action']
            if action and not action.startswith(('javascript:', 'mailto:')):
                abs_url = urljoin(page_url, action)
                if urlparse(abs_url).netloc == self.base_domain:
                    form_tag['action'] = self.download_page(abs_url) or action

        return str(soup)

    def download_page(self, url):
        """Download and process a single HTML page"""
        if url in self.visited_urls:
            return self.asset_map.get(url)

        self.visited_urls.add(url)
        print(f"Processing page: {url}")

        try:
            # Handle relative URLs
            if not urlparse(url).scheme:
                url = urljoin(self.base_url, url)

            response = self.session.get(url, timeout=10)
            response.raise_for_status()

            content_type = response.headers.get('Content-Type', '').split(';')[0]
            if not content_type or 'text/html' not in content_type:
                return None

            # Process HTML content
            processed_html = self.process_html(response.content, url)

            # Save HTML
            local_relative_path = self.get_resource_path(url, 'text/html')
            local_path = os.path.join(self.output_dir, local_relative_path)
            os.makedirs(os.path.dirname(local_path), exist_ok=True)

            with open(local_path, 'w', encoding='utf-8') as f:
                f.write(processed_html)

            self.asset_map[url] = local_relative_path
            return local_relative_path

        except Exception as e:
            print(f"Failed to process page {url}: {str(e)}")
            return None

    def start(self):
        """Begin the download process"""
        self.download_page(self.base_url)
        print(f"\nWebsite successfully downloaded to: {self.output_dir}")


# Colab-compatible execution
if __name__ == "__main__":
    # Check if running in Colab
    if 'google.colab' in sys.modules:
        print("Running in Google Colab")
        base_url = input("Enter website URL to download: ").strip()
        output_dir = input("Enter output directory [downloaded_website]: ").strip() or "downloaded_website"
    else:
        # Command-line execution
        import argparse
        parser = argparse.ArgumentParser(description='Download website assets for offline use.')
        parser.add_argument('url', help='URL of the website to download')
        parser.add_argument('-o', '--output', default='downloaded_website',
                            help='Output directory (default: downloaded_website)')
        args = parser.parse_args()
        base_url = args.url
        output_dir = args.output

    # Create output directory
    os.makedirs(output_dir, exist_ok=True)

    # Start downloader
    downloader = WebsiteDownloader(base_url, output_dir)
    downloader.start()

Running in Google Colab
Enter website URL to download: https://theolivebranchpk.com/
Enter output directory [downloaded_website]: /content/drive/MyDrive/PythonApps/WebsitesDownloader
Processing page: https://theolivebranchpk.com/
Downloaded: https://theolivebranchpk.com/images/logos/theOliveBranchPK-SVG.png => theolivebranchpk.com/_images_logos/theOliveBranchPK-SVG.png
Downloaded: https://theolivebranchpk.com/ => theolivebranchpk.com/index.html
Downloaded: https://theolivebranchpk.com/fonts/fontawesome-webfont.woff2 => theolivebranchpk.com/_fonts/fontawesome-webfont.woff2
Downloaded: https://theolivebranchpk.com/css/assets.min.css => theolivebranchpk.com/_css/assets.min.css
Downloaded: https://theolivebranchpk.com/css/fonts.css => theolivebranchpk.com/_css/fonts.css
Downloaded: https://theolivebranchpk.com/css/styles.css => theolivebranchpk.com/_css/styles.css
Downloaded: https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.6.0/css/all.min.css => cdnjs.cloudflare.com/_ajax_libs_font-a

In [15]:
pip install playwright

Collecting playwright
  Using cached playwright-1.53.0-py3-none-manylinux1_x86_64.whl.metadata (3.5 kB)
Collecting pyee<14,>=13 (from playwright)
  Downloading pyee-13.0.0-py3-none-any.whl.metadata (2.9 kB)
Downloading playwright-1.53.0-py3-none-manylinux1_x86_64.whl (45.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.8/45.8 MB[0m [31m24.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyee-13.0.0-py3-none-any.whl (15 kB)
Installing collected packages: pyee, playwright
Successfully installed playwright-1.53.0 pyee-13.0.0


In [18]:
import os
import re
import sys
import json
import time
import mimetypes
import hashlib
import argparse
from urllib.parse import urlparse, urljoin
from pathlib import Path
from playwright.sync_api import sync_playwright
import requests
from bs4 import BeautifulSoup

class AdvancedWebsiteDownloader:
    def __init__(self, base_url, output_dir, depth=1, delay=1.0, max_retries=3):
        self.base_url = base_url
        parsed_url = urlparse(base_url)
        self.base_domain = parsed_url.netloc
        self.scheme = parsed_url.scheme or 'https'
        self.output_dir = Path(output_dir).resolve()
        self.depth = depth
        self.delay = delay
        self.max_retries = max_retries
        self.visited_urls = set()
        self.resource_map = {}
        self.cookies = {}
        self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36"
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': self.user_agent,
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Connection': 'keep-alive'
        })
        mimetypes.init()

    def sanitize_path(self, path):
        """Create filesystem-safe paths from URLs with directory structure"""
        if not path or path == '/':
            return 'index.html'

        path = re.sub(r'[^\w\-_\.\/]', '_', path)
        path = path.strip('/')

        # Handle extension-less paths
        if '.' not in os.path.basename(path):
            path += '.html'

        return path

    def get_local_path(self, url, content_type=None):
        """Generate local path while preserving directory structure"""
        parsed = urlparse(url)
        path = parsed.path or '/index.html'

        # Handle root path
        if path == '/':
            return 'index.html'

        # Handle directory paths
        if path.endswith('/'):
            path += 'index.html'

        # Get filename
        filename = os.path.basename(path)
        dir_path = os.path.dirname(path)

        # Add extension if missing
        if '.' not in filename:
            if content_type:
                ext = mimetypes.guess_extension(content_type.split(';')[0]) or '.bin'
                filename += ext
            else:
                # Try to guess from URL
                ext = mimetypes.guess_extension(parsed.path.split('/')[-1]) or '.bin'
                filename += ext

        # Create domain directory
        domain_dir = re.sub(r'[^\w\-_\.]', '_', self.base_domain)

        # Combine paths
        return os.path.join(domain_dir, self.sanitize_path(dir_path), filename)

    def download_resource(self, url, referer=None, is_retry=False):
        """Download resource with retry logic and proper content handling"""
        if url in self.resource_map:
            return self.resource_map[url]

        # Skip data URLs
        if url.startswith(('data:', 'javascript:', 'mailto:')):
            return None

        # Handle relative URLs
        if not urlparse(url).scheme:
            url = urljoin(f"{self.scheme}://{self.base_domain}", url)

        try:
            headers = {'Referer': referer} if referer else {}
            response = self.session.get(url, headers=headers, timeout=15, stream=True)
            response.raise_for_status()

            # Get content type
            content_type = response.headers.get('Content-Type', '').split(';')[0]
            if not content_type:
                content_type = mimetypes.guess_type(url)[0] or 'application/octet-stream'

            # Get local path
            local_relative_path = self.get_local_path(url, content_type)
            local_path = self.output_dir / local_relative_path

            # Create directories
            local_path.parent.mkdir(parents=True, exist_ok=True)

            # Handle text-based resources
            if 'text/' in content_type or 'application/' in content_type:
                content = response.content.decode('utf-8', errors='replace')

                # Process based on content type
                if 'css' in content_type:
                    content = self.process_css(content, url)
                elif 'html' in content_type:
                    content = self.process_html(content, url)
                elif 'javascript' in content_type:
                    content = self.process_js(content, url)

                with open(local_path, 'w', encoding='utf-8') as f:
                    f.write(content)
            else:
                # Binary files
                with open(local_path, 'wb') as f:
                    for chunk in response.iter_content(8192):
                        f.write(chunk)

            print(f"Downloaded: {url} => {local_relative_path}")
            self.resource_map[url] = local_relative_path
            return local_relative_path

        except Exception as e:
            if not is_retry and self.max_retries > 0:
                print(f"Retrying ({self.max_retries} left): {url}")
                self.max_retries -= 1
                time.sleep(1)
                return self.download_resource(url, referer, is_retry=True)
            print(f"Failed to download {url}: {str(e)}")
            return None

    def process_css(self, content, css_url):
        """Deep CSS processing including @import rules and nested URLs"""
        # Process @import rules first
        content = re.sub(
            r'@import\s+(url\()?[\'"](.*?)[\'"]\)?;',
            lambda m: self.process_css_import(m, css_url),
            content,
            flags=re.IGNORECASE
        )

        # Process all url() references
        return re.sub(
            r'url\([\'"]?(.*?)[\'"]?\)',
            lambda m: self.process_css_url(m, css_url),
            content
        )

    def process_css_import(self, match, css_url):
        """Process CSS @import rules by downloading imported stylesheets"""
        import_url = match.group(2).strip()
        if import_url.startswith(('http:', 'https:')):
            full_url = import_url
        else:
            full_url = urljoin(css_url, import_url)

        local_path = self.download_resource(full_url, referer=css_url)
        if local_path:
            return f'@import url("{local_path}");'
        return match.group(0)

    def process_css_url(self, match, css_url):
        """Process CSS url() references"""
        url = match.group(1).strip('\'"')
        if url.startswith(('data:', 'http:', 'https:')):
            return match.group(0)

        full_url = urljoin(css_url, url)
        local_path = self.download_resource(full_url, referer=css_url)
        return f'url("{local_path}")' if local_path else match.group(0)

    def process_js(self, content, js_url):
        """Process JavaScript files for dynamic resource loading"""
        # Handle dynamic imports
        content = re.sub(
            r'import\s*\(?[\'"](.*?)[\'"]\)?',
            lambda m: self.process_js_import(m, js_url),
            content
        )

        # Handle fetch/XHR calls (basic pattern matching)
        content = re.sub(
            r'fetch\([\'"]((?!http).*?)[\'"]\)',
            lambda m: self.process_js_fetch(m, js_url),
            content
        )

        return content

    def process_js_import(self, match, js_url):
        """Process JavaScript dynamic imports"""
        import_url = match.group(1).strip()
        full_url = urljoin(js_url, import_url)
        local_path = self.download_resource(full_url, referer=js_url)
        return f'import("{local_path}")' if local_path else match.group(0)

    def process_js_fetch(self, match, js_url):
        """Process JavaScript fetch calls to relative URLs"""
        fetch_url = match.group(1).strip()
        full_url = urljoin(js_url, fetch_url)
        local_path = self.download_resource(full_url, referer=js_url)
        return f'fetch("{local_path}")' if local_path else match.group(0)

    def process_html(self, content, page_url):
        """Process HTML content with support for modern web features"""
        soup = BeautifulSoup(content, 'html.parser')

        # Process standard resource tags
        resource_tags = [
            ('link', 'href', lambda e: e.get('rel', [''])[0] not in ['alternate', 'canonical']),
            ('script', 'src', None),
            ('img', 'src', None),
            ('img', 'srcset', None),
            ('source', 'src', None),
            ('source', 'srcset', None),
            ('audio', 'src', None),
            ('video', 'src', None),
            ('video', 'poster', None),
            ('iframe', 'src', None),
            ('embed', 'src', None),
            ('object', 'data', None),
            ('meta', 'content', lambda e: 'property' in e.attrs and e['property'] in ['og:image', 'og:video']),
            ('form', 'action', None),
            ('button', 'formaction', None),
            ('input', 'formaction', None),
            ('a', 'ping', None),
            ('area', 'href', None),
        ]

        for tag, attr, condition in resource_tags:
            for element in soup.find_all(tag, **{attr: True}):
                if condition and not condition(element):
                    continue

                urls = []
                if attr == 'srcset':
                    # Handle srcset with multiple URLs
                    srcset = element[attr]
                    parts = [p.strip() for p in srcset.split(',') if p.strip()]
                    for part in parts:
                        if ' ' in part:
                            url_part, descriptor = part.split(' ', 1)
                        else:
                            url_part, descriptor = part, None

                        if not url_part.startswith(('data:', 'javascript:')):
                            full_url = urljoin(page_url, url_part)
                            local_path = self.download_resource(full_url, referer=page_url)
                            if local_path:
                                new_part = local_path + (f' {descriptor}' if descriptor else '')
                                urls.append(new_part)
                            else:
                                urls.append(part)
                        else:
                            urls.append(part)
                    element[attr] = ', '.join(urls)
                else:
                    url = element.get(attr)
                    if url and not url.startswith(('data:', 'javascript:')):
                        full_url = urljoin(page_url, url)
                        local_path = self.download_resource(full_url, referer=page_url)
                        if local_path:
                            element[attr] = local_path

        # Process inline styles
        for element in soup.find_all(style=True):
            element['style'] = re.sub(
                r'url\([\'"]?(.*?)[\'"]?\)',
                lambda m: f"url('{self.download_resource(urljoin(page_url, m.group(1))), page_url}')"
                          if not m.group(1).startswith('data:')
                          else m.group(0),
                element['style']
            )

        # Process inline scripts (basic)
        for script in soup.find_all('script', string=True):
            if script.string:
                script.string = self.process_js(script.string, page_url)

        # Process shadow DOM content
        for template in soup.find_all('template'):
            shadow_content = self.process_html(str(template), page_url)
            template.clear()
            template.append(BeautifulSoup(shadow_content, 'html.parser'))

        # Process web components
        for element in soup.find_all(attrs={"shadowroot": True}):
            shadow_content = self.process_html(str(element), page_url)
            element.clear()
            element.append(BeautifulSoup(shadow_content, 'html.parser'))

        # Process links to other pages
        if self.depth > 0:
            for a_tag in soup.find_all('a', href=True):
                href = a_tag['href']
                if href and not href.startswith(('#', 'javascript:', 'mailto:')):
                    full_url = urljoin(page_url, href)
                    if urlparse(full_url).netloc == self.base_domain and full_url not in self.visited_urls:
                        self.download_page(full_url, depth=self.depth-1)

        return str(soup)

    def capture_with_playwright(self, url):
        """Use Playwright to render page and capture all network requests"""
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            context = browser.new_context(
                user_agent=self.user_agent,
                java_script_enabled=True,
                ignore_https_errors=True
            )

            page = context.new_page()
            page_resources = set()

            def log_request(request):
                page_resources.add(request.url)

            page.on("request", log_request)

            try:
                page.goto(url, wait_until="networkidle", timeout=60000)
                # Scroll to trigger lazy-loaded content
                page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
                time.sleep(1)
                content = page.content()
                # Capture cookies for session
                self.cookies = {c['name']: c['value'] for c in context.cookies()}
                self.session.cookies.update(self.cookies)
            finally:
                context.close()
                browser.close()

            return content, page_resources

    def download_page(self, url, depth=1):
        """Download a page with intelligent resource handling"""
        if url in self.visited_urls:
            return
        self.visited_urls.add(url)

        print(f"Processing: {url} (depth: {depth})")

        try:
            # Use Playwright to render page and capture resources
            rendered_html, all_resources = self.capture_with_playwright(url)

            # Download all resources captured by Playwright
            for resource_url in all_resources:
                if resource_url not in self.resource_map:
                    self.download_resource(resource_url, referer=url)

            # Process HTML after rendering
            processed_html = self.process_html(rendered_html, url)

            # Save main HTML
            local_relative_path = self.get_local_path(url, 'text/html')
            local_path = self.output_dir / local_relative_path
            local_path.parent.mkdir(parents=True, exist_ok=True)

            with open(local_path, 'w', encoding='utf-8') as f:
                f.write(processed_html)

            print(f"Page saved: {local_relative_path}")
            return local_relative_path

        except Exception as e:
            print(f"Failed to process page {url}: {str(e)}")
            return None

    def start(self):
        """Start the download process"""
        self.output_dir.mkdir(parents=True, exist_ok=True)
        self.download_page(self.base_url, depth=self.depth)

        # Save resource map for debugging
        with open(self.output_dir / 'resource_map.json', 'w') as f:
            json.dump(self.resource_map, f, indent=2)

        print(f"\nWebsite successfully downloaded to: {self.output_dir}")
        print(f"Total resources downloaded: {len(self.resource_map)}")
        print(f"Open {self.output_dir / self.get_local_path(self.base_url, 'text/html')} to view the site")


# Colab-compatible execution
if __name__ == "__main__":
    # Check if running in Colab
    if 'google.colab' in sys.modules:
        print("Running in Google Colab")
        base_url = input("Enter website URL to download: ").strip()
        output_dir = input("Enter output directory [downloaded_website]: ").strip() or "downloaded_website"
        depth = int(input("Enter crawl depth [1]: ").strip() or 1)
    else:
        # Command-line execution
        parser = argparse.ArgumentParser(description='Advanced Website Downloader')
        parser.add_argument('url', help='URL of the website to download')
        parser.add_argument('-o', '--output', default='downloaded_website',
                           help='Output directory (default: downloaded_website)')
        parser.add_argument('-d', '--depth', type=int, default=1,
                           help='Crawl depth (default: 1)')
        parser.add_argument('--delay', type=float, default=0.5,
                           help='Delay between requests in seconds (default: 0.5)')
        args = parser.parse_args()
        base_url = args.url
        output_dir = args.output
        depth = args.depth

    downloader = AdvancedWebsiteDownloader(
        base_url=base_url,
        output_dir=output_dir,
        depth=depth,
        delay=0.5
    )
    downloader.start()

Running in Google Colab
Enter website URL to download: https://theolivebranchpk.com/
Enter output directory [downloaded_website]: /content/drive/MyDrive/PythonApps/WebsitesDownloader
Enter crawl depth [1]: 2
Processing: https://theolivebranchpk.com/ (depth: 2)
Failed to process page https://theolivebranchpk.com/: It looks like you are using Playwright Sync API inside the asyncio loop.
Please use the Async API instead.

Website successfully downloaded to: /content/drive/MyDrive/PythonApps/WebsitesDownloader
Total resources downloaded: 0
Open /content/drive/MyDrive/PythonApps/WebsitesDownloader/index.html to view the site
