<a href="https://colab.research.google.com/github/LoggingNewMemory/MirrorBot/blob/main/Bot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Google Colab Mirrorbot** <br />
By: Kanagawa Yamada

In [None]:
# Mirror Bot for Google Colab with Download Speed Indicator and Preserved Filenames
# Install required packages
!pip install requests beautifulsoup4 pyrogram tgcrypto

import os
import requests
import asyncio
import time
from urllib.parse import urlparse, unquote
from pathlib import Path
import zipfile
import tarfile
from google.colab import drive, files
from IPython.display import display, HTML
import shutil
import re

class MirrorBot:
    def __init__(self):
        self.download_dir = "/content/downloads"
        self.drive_dir = "/content/drive/MyDrive/MirrorBot"
        self.mount_drive()
        self.setup_directories()

    def mount_drive(self):
        """Mount Google Drive"""
        try:
            drive.mount('/content/drive')
            print("✅ Google Drive mounted successfully")
        except Exception as e:
            print(f"❌ Error mounting drive: {e}")

    def setup_directories(self):
        """Create necessary directories"""
        os.makedirs(self.download_dir, exist_ok=True)
        os.makedirs(self.drive_dir, exist_ok=True)
        print(f"📁 Directories created: {self.download_dir}, {self.drive_dir}")

    def get_filename_from_response(self, response):
        """Extract filename from response headers (Content-Disposition)"""
        content_disposition = response.headers.get('Content-Disposition', '')
        if content_disposition:
            # Try to find filename in Content-Disposition header
            filename_match = re.search(r'filename[*]?=([^;]+)', content_disposition)
            if filename_match:
                filename = filename_match.group(1).strip('\'"')
                # Handle RFC 5987 encoded filenames
                if filename.startswith("UTF-8''"):
                    filename = unquote(filename[7:])
                return filename
        return None

    def get_filename_from_url(self, url, response=None):
        """Extract filename from URL or response headers"""
        # First try to get filename from response headers
        if response:
            header_filename = self.get_filename_from_response(response)
            if header_filename:
                return header_filename

        # Parse URL to get filename
        parsed = urlparse(url)
        filename = os.path.basename(unquote(parsed.path))

        # If we got a proper filename from URL, use it
        if filename and not filename.startswith('.') and len(filename) > 0:
            # Remove query parameters if they got included
            filename = filename.split('?')[0].split('#')[0]
            if filename:
                return filename

        # Try to extract from query parameters (common in download links)
        from urllib.parse import parse_qs
        query_params = parse_qs(parsed.query)
        for param in ['filename', 'file', 'name', 'title']:
            if param in query_params and query_params[param][0]:
                potential_filename = query_params[param][0]
                if potential_filename and not potential_filename.startswith('.'):
                    return potential_filename

        # Last resort: try to extract from the full URL path
        path_parts = [part for part in parsed.path.split('/') if part]
        if path_parts:
            last_part = unquote(path_parts[-1])
            if last_part and not last_part.startswith('.') and len(last_part) > 0:
                return last_part.split('?')[0].split('#')[0]

        # Final fallback: generate a name based on domain and hash
        domain = parsed.netloc.replace('www.', '') if parsed.netloc else 'download'
        return f"{domain}_{hash(url) % 10000}"

    def sanitize_filename(self, filename):
        """Sanitize filename to be safe for filesystem"""
        # Replace unsafe characters
        unsafe_chars = '<>:"/\\|?*'
        for char in unsafe_chars:
            filename = filename.replace(char, '_')

        # Limit length
        if len(filename) > 200:
            name, ext = os.path.splitext(filename)
            filename = name[:200-len(ext)] + ext

        return filename

    def format_speed(self, bytes_per_second):
        """Format download speed"""
        for unit in ['B/s', 'KB/s', 'MB/s', 'GB/s']:
            if bytes_per_second < 1024:
                return f"{bytes_per_second:.1f} {unit}"
            bytes_per_second /= 1024
        return f"{bytes_per_second:.1f} TB/s"

    def estimate_time_remaining(self, downloaded, total_size, speed):
        """Estimate remaining download time"""
        if speed == 0 or total_size == 0:
            return "∞"

        remaining_bytes = total_size - downloaded
        remaining_seconds = remaining_bytes / speed

        if remaining_seconds < 60:
            return f"{int(remaining_seconds)}s"
        elif remaining_seconds < 3600:
            return f"{int(remaining_seconds // 60)}m {int(remaining_seconds % 60)}s"
        else:
            hours = int(remaining_seconds // 3600)
            minutes = int((remaining_seconds % 3600) // 60)
            return f"{hours}h {minutes}m"

    def download_file(self, url, custom_name=None):
        """Download file from URL with speed indicator and preserved filename"""
        try:
            print(f"🔄 Starting download: {url}")

            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            }

            # Make initial request to get headers
            response = requests.get(url, headers=headers, stream=True)
            response.raise_for_status()

            # Get filename - priority: custom_name > response headers > URL
            if custom_name:
                filename = custom_name
            else:
                filename = self.get_filename_from_url(url, response)

            # Sanitize filename
            filename = self.sanitize_filename(filename)
            filepath = os.path.join(self.download_dir, filename)

            # Handle duplicate filenames
            if os.path.exists(filepath):
                base, ext = os.path.splitext(filename)
                counter = 1
                while os.path.exists(filepath):
                    new_filename = f"{base}_{counter}{ext}"
                    filepath = os.path.join(self.download_dir, new_filename)
                    counter += 1
                filename = os.path.basename(filepath)

            print(f"📁 Saving as: {filename}")

            total_size = int(response.headers.get('content-length', 0))
            downloaded = 0

            # Speed calculation variables
            start_time = time.time()
            last_time = start_time
            last_downloaded = 0
            speed_samples = []
            max_samples = 10  # Keep last 10 speed samples for smoothing

            with open(filepath, 'wb') as file:
                for chunk in response.iter_content(chunk_size=8192):
                    if chunk:
                        file.write(chunk)
                        downloaded += len(chunk)

                        current_time = time.time()
                        time_elapsed = current_time - last_time

                        # Calculate speed every 0.5 seconds for smooth updates
                        if time_elapsed >= 0.5:
                            bytes_in_interval = downloaded - last_downloaded
                            current_speed = bytes_in_interval / time_elapsed if time_elapsed > 0 else 0

                            # Add to speed samples for smoothing
                            speed_samples.append(current_speed)
                            if len(speed_samples) > max_samples:
                                speed_samples.pop(0)

                            # Calculate average speed
                            avg_speed = sum(speed_samples) / len(speed_samples)

                            # Update display
                            if total_size > 0:
                                progress = (downloaded / total_size) * 100
                                eta = self.estimate_time_remaining(downloaded, total_size, avg_speed)
                                print(f"\r📥 {progress:.1f}% | {self.format_speed(avg_speed)} | ETA: {eta} | {self.format_size(downloaded)}/{self.format_size(total_size)}",
                                      end='', flush=True)
                            else:
                                print(f"\r📥 {self.format_speed(avg_speed)} | {self.format_size(downloaded)} downloaded",
                                      end='', flush=True)

                            last_time = current_time
                            last_downloaded = downloaded

            # Final statistics
            total_time = time.time() - start_time
            avg_speed = downloaded / total_time if total_time > 0 else 0

            print(f"\n✅ Downloaded: {filename}")
            print(f"📊 Size: {self.format_size(downloaded)} | Time: {total_time:.1f}s | Avg Speed: {self.format_speed(avg_speed)}")
            return filepath

        except Exception as e:
            print(f"\n❌ Download failed: {e}")
            return None

    def format_size(self, bytes):
        """Format file size"""
        for unit in ['B', 'KB', 'MB', 'GB']:
            if bytes < 1024:
                return f"{bytes:.1f} {unit}"
            bytes /= 1024
        return f"{bytes:.1f} TB"

    def extract_archive(self, filepath):
        """Extract compressed files"""
        try:
            extract_dir = os.path.splitext(filepath)[0]
            os.makedirs(extract_dir, exist_ok=True)

            print(f"📦 Extracting archive...")

            if filepath.endswith(('.zip', '.jar')):
                with zipfile.ZipFile(filepath, 'r') as zip_ref:
                    zip_ref.extractall(extract_dir)
            elif filepath.endswith(('.tar', '.tar.gz', '.tgz')):
                with tarfile.open(filepath, 'r:*') as tar_ref:
                    tar_ref.extractall(extract_dir)
            else:
                print("⚠️ Not a supported archive format")
                return filepath

            print(f"✅ Extracted to: {extract_dir}")
            return extract_dir

        except Exception as e:
            print(f"❌ Extraction failed: {e}")
            return filepath

    def move_to_drive(self, filepath):
        """Move downloaded file to Google Drive"""
        try:
            filename = os.path.basename(filepath)
            drive_path = os.path.join(self.drive_dir, filename)

            print(f"☁️ Moving to Google Drive...")

            if os.path.isdir(filepath):
                shutil.copytree(filepath, drive_path, dirs_exist_ok=True)
            else:
                shutil.copy2(filepath, drive_path)

            print(f"✅ Moved to Drive: {drive_path}")
            return drive_path

        except Exception as e:
            print(f"❌ Failed to move to Drive: {e}")
            return None

    def list_downloads(self):
        """List all downloaded files"""
        print("\n📋 Downloaded Files:")
        print("-" * 60)

        total_files = 0
        total_size = 0

        for root, dirs, files in os.walk(self.download_dir):
            level = root.replace(self.download_dir, '').count(os.sep)
            indent = ' ' * 2 * level
            if level == 0 and files:
                print(f"{indent}📁 {os.path.basename(root) or 'downloads'}/")
            elif level > 0:
                print(f"{indent}📁 {os.path.basename(root)}/")

            subindent = ' ' * 2 * (level + 1)
            for file in files:
                filepath = os.path.join(root, file)
                size = os.path.getsize(filepath)
                total_files += 1
                total_size += size
                print(f"{subindent}📄 {file} ({self.format_size(size)})")

        print("-" * 60)
        print(f"📊 Total: {total_files} files, {self.format_size(total_size)}")

    def clear_downloads(self):
        """Clear download directory"""
        try:
            shutil.rmtree(self.download_dir)
            os.makedirs(self.download_dir, exist_ok=True)
            print("🗑️ Downloads cleared")
        except Exception as e:
            print(f"❌ Error clearing downloads: {e}")

    def mirror(self, url, extract=False, move_to_drive=True, custom_name=None):
        """Main mirror function"""
        print(f"🚀 Mirror Bot Starting...")
        print(f"📂 Download directory: {self.download_dir}")
        print(f"☁️ Drive directory: {self.drive_dir}")
        print("=" * 60)

        # Download file
        filepath = self.download_file(url, custom_name)
        if not filepath:
            return None

        # Extract if requested and it's an archive
        if extract:
            filepath = self.extract_archive(filepath)

        # Move to Google Drive if requested
        if move_to_drive:
            drive_path = self.move_to_drive(filepath)
            if drive_path:
                print(f"✨ Mirror completed! File saved to Drive")
                return drive_path

        print(f"✨ Mirror completed! File saved locally")
        return filepath

# Initialize the mirror bot
bot = MirrorBot()

# Example usage functions
def mirror_file(url, extract=False, move_to_drive=True, custom_name=None):
    """Mirror a file from URL"""
    return bot.mirror(url, extract, move_to_drive, custom_name)

def list_files():
    """List downloaded files"""
    bot.list_downloads()

def clear_files():
    """Clear all downloads"""
    bot.clear_downloads()

# Display usage instructions
display(HTML("""
<div style="border: 2px solid #4CAF50; padding: 15px; border-radius: 10px; background-color: #f9f9f9;">
<h3>🤖 Enhanced Mirror Bot Ready! (With Preserved Filenames)</h3>
<h4>New Features:</h4>
<ul>
<li>📊 Real-time download speed</li>
<li>⏱️ ETA (Estimated Time of Arrival)</li>
<li>📈 Progress with detailed statistics</li>
<li>🎯 Improved file listing with total stats</li>
<li>✨ <strong>Preserves original filenames from downloads</strong></li>
<li>🔧 Smart filename detection from URLs and headers</li>
<li>🛡️ Handles duplicate filenames automatically</li>
</ul>
<h4>Usage Examples:</h4>
<code>
# Mirror a file (preserves original name)<br>
mirror_file("https://example.com/LOS-GSI.zip")<br>
# Output: LOS-GSI.zip<br><br>

# Mirror and extract archive<br>
mirror_file("https://example.com/archive.zip", extract=True)<br><br>

# Mirror with custom name (overrides detection)<br>
mirror_file("https://example.com/file.pdf", custom_name="my_document.pdf")<br><br>

# Mirror without moving to Drive<br>
mirror_file("https://example.com/file.txt", move_to_drive=False)<br><br>

# List downloaded files<br>
list_files()<br><br>

# Clear downloads<br>
clear_files()
</code>
</div>
"""))

print("🎉 Enhanced Mirror Bot initialized successfully!")
print("📝 Use the functions above to start mirroring files")
print("🚀 Now with enhanced filename preservation!")