In [26]:
import re
import json
import os
from datetime import datetime, timezone
from telethon import TelegramClient, events
from telethon.errors import PhoneNumberInvalidError, SessionPasswordNeededError
from telethon.tl.functions.messages import GetHistoryRequest
from telethon.tl.types import PeerChannel
import asyncio

# Your Telegram API credentials (replace these with your own)
api_id = 123456        # e.g., 123456
api_hash = 'YourAPIHash'  # e.g., 'abcd1234abcd1234abcd1234abcd1234'
phone = '+910000000000' # e.g., '+1234567890'

# Constants
channel_username = 'toronionlinks'  # You can change to other public channels
output_file = 'onion_links.json'
last_msg_file = 'last_message_id.txt'
regex_pattern = r"http[s]?://[^\s]+\.onion"

# Load last processed message ID
def load_last_message_id():
    if os.path.exists(last_msg_file):
        with open(last_msg_file, 'r') as f:
            return int(f.read().strip())
    return 0

# Save last processed message ID
def save_last_message_id(msg_id):
    with open(last_msg_file, 'w') as f:
        f.write(str(msg_id))

# Extract .onion links from text using regex
def extract_onion_links(text):
    return re.findall(regex_pattern, text)

# Format the message into JSON
def format_json(onion_url):
    return {
        "source": "telegram",
        "url": onion_url,
        "discovered_at": datetime.now(timezone.utc).isoformat(),
        "context": f"Found in Telegram channel @{channel_username}",
        "status": "pending"
    }

# Append JSON objects to output file
def write_to_json_file(data_list):
    with open(output_file, 'a', encoding='utf-8') as f:
        for entry in data_list:
            json.dump(entry, f)
            f.write('\n')

# Main logic
async def main():
    client = TelegramClient('session_name', api_id, api_hash)

    try:
        await client.start(phone=phone)
    except PhoneNumberInvalidError:
        print("Invalid phone number.")
        return
    except SessionPasswordNeededError:
        password = input("Two-step verification is enabled. Enter your password: ")
        await client.sign_in(password=password)

    try:
        channel = await client.get_entity(channel_username)
    except Exception as e:
        print(f"Failed to get channel: {e}")
        return

    last_msg_id = load_last_message_id()
    messages_to_process = []
    new_last_msg_id = last_msg_id

    # Fetch messages
    async for message in client.iter_messages(channel, limit=100):
        if message.id <= last_msg_id:
            break
        if message.message:
            messages_to_process.append(message)
            new_last_msg_id = max(new_last_msg_id, message.id)

    # Process messages in chronological order
    messages_to_process.reverse()

    all_extracted = []
    for msg in messages_to_process:
        links = extract_onion_links(msg.message)
        for link in links:
            all_extracted.append(format_json(link))

    # Save results
    if all_extracted:
        write_to_json_file(all_extracted)
        print(f"✅ Extracted {len(all_extracted)} .onion links.")
    else:
        print("No .onion links found in new messages.")

    save_last_message_id(new_last_msg_id)
    await client.disconnect()

# Run the main function
if __name__ == '__main__':
    try:
        asyncio.run(main())
    except Exception as e:
        print(f"Unexpected error: {e}")


Unexpected error: The api_id/api_hash combination is invalid (caused by SendCodeRequest)
