In [1]:
import pandas as pd
import sys
import os

# Add the scripts directory to the Python path
sys.path.append(os.path.abspath('../scripts'))

In [2]:
# Import the scraping script
from telegram_scraper import scrape_telegram_channel, save_data_to_csv
import asyncio

# List of Telegram channels to scrape
channels = [
    'DoctorsET',  
    'lobelia4cosmetics',
    'yetenaweg',
    'EAHCI'
]

# Define a function to scrape multiple channels and save data
async def scrape_multiple_channels(channels, limit=50):
    """
    Scrapes multiple Telegram channels and combines the results into a single DataFrame.
    
    Parameters:
    - channels: List of Telegram channel names.
    - limit: Number of messages to scrape per channel.
    
    Returns:
    - Combined DataFrame of all scraped data.
    """
    all_dataframes = []
    for channel in channels:
        df = await scrape_telegram_channel(channel, limit=limit)
        df['channel'] = channel
        all_dataframes.append(df)

    # Combine all DataFrames into one
    combined_df = pd.concat(all_dataframes, ignore_index=True)
    return combined_df

# Call the asynchronous function in the notebook using asyncio
scraped_data = asyncio.run(scrape_multiple_channels(channels))

# Preview the scraped data
scraped_data.head()

# Save to CSV
save_data_to_csv(scraped_data, filename="medical_businesses_scraped_data.csv")


2024-10-10 15:49:46,532 - INFO - Connecting to 149.154.167.51:443/TcpFull...
2024-10-10 15:49:46,786 - INFO - Connection to 149.154.167.51:443/TcpFull complete!
2024-10-10 15:49:56,038 - INFO - Phone migrated to 4
2024-10-10 15:49:56,315 - INFO - Reconnecting to new data center 4
2024-10-10 15:49:56,611 - INFO - Disconnecting from 149.154.167.51:443/TcpFull...
2024-10-10 15:49:56,616 - INFO - Disconnection from 149.154.167.51:443/TcpFull complete!
2024-10-10 15:49:56,618 - INFO - Connecting to 149.154.167.92:443/TcpFull...
2024-10-10 15:49:58,923 - INFO - Connection to 149.154.167.92:443/TcpFull complete!
2024-10-10 15:50:31,631 - INFO - Client connected to Telegram.
2024-10-10 15:50:31,632 - INFO - Scraping data from channel: DoctorsET


Signed in successfully as Naima T; remember to not break the ToS or you will risk an account ban!


2024-10-10 15:50:32,683 - INFO - Scraped 50 messages from DoctorsET
2024-10-10 15:50:32,686 - INFO - Disconnecting from 149.154.167.92:443/TcpFull...
2024-10-10 15:50:32,689 - INFO - Disconnection from 149.154.167.92:443/TcpFull complete!
2024-10-10 15:50:32,725 - INFO - Connecting to 149.154.167.92:443/TcpFull...
2024-10-10 15:50:32,940 - INFO - Connection to 149.154.167.92:443/TcpFull complete!
2024-10-10 15:50:33,481 - INFO - Client connected to Telegram.
2024-10-10 15:50:33,482 - INFO - Scraping data from channel: lobelia4cosmetics
2024-10-10 15:50:34,323 - INFO - Scraped 50 messages from lobelia4cosmetics
2024-10-10 15:50:34,324 - INFO - Disconnecting from 149.154.167.92:443/TcpFull...
2024-10-10 15:50:34,325 - INFO - Disconnection from 149.154.167.92:443/TcpFull complete!
2024-10-10 15:50:34,344 - INFO - Connecting to 149.154.167.92:443/TcpFull...
2024-10-10 15:50:34,622 - INFO - Connection to 149.154.167.92:443/TcpFull complete!
2024-10-10 15:50:35,159 - INFO - Client connected 