In [1]:
import sys
sys.path.append("../src") # add the script and src directory to the path
sys.path.append("../script") # add the scraping directory to the path
from scraping.telegram_scraper import TelegramScraper
from utils.config import TELEGRAM_CONFIG

import nest_asyncio

# Required for Jupyter
nest_asyncio.apply()

# Initialize scraper
scraper = TelegramScraper(
    session_name=TELEGRAM_CONFIG['session_name'],
    api_id=int(TELEGRAM_CONFIG['api_id']),
    api_hash=TELEGRAM_CONFIG['api_hash']
)

# Channels to scrape
channels = [
    '@Shageronlinestore',
    '@ZemenExpress',
    '@nevacomputer',
    '@meneshayeofficial',
    '@ethio_brand_collection',
    '@Leyueqa',
    '@helloomarketethiopia'
]

# Run the scraper
await scraper.run(channels)

print("Scraping completed! Check telegram_data.csv")

Signed in successfully as መስፍን ሙሉጌታ; remember to not break the ToS or you will risk an account ban!
Scraping Sheger online-store (@@Shageronlinestore)...
Scraping Zemen Express® (@@ZemenExpress)...
Scraping NEVA COMPUTER® (@@nevacomputer)...
Scraping መነሻዬ (@@meneshayeofficial)...
Scraping EthioBrand® (@@ethio_brand_collection)...
Scraping ልዩ እቃ (@@Leyueqa)...
Scraping HellooMarket (@@helloomarketethiopia)...
Scraping completed! Check telegram_data.csv


In [3]:
# load scraped data
import pandas as pd
data = pd.read_csv('telegram_data.csv')
# Display the first few rows of the data
data.head()

Unnamed: 0,Channel,Username,Message ID,Text,Date,Media Path
0,Sheger online-store,@Shageronlinestore,7394,,2025-06-20 11:47:53+00:00,../data/telegram_media\@Shageronlinestore_7394...
1,Sheger online-store,@Shageronlinestore,7393,💥 1L Water Bottle\n\n 💯High Quality\n\n⚡...,2025-06-20 11:47:53+00:00,../data/telegram_media\@Shageronlinestore_7393...
2,Sheger online-store,@Shageronlinestore,7392,,2025-06-20 09:03:23+00:00,../data/telegram_media\@Shageronlinestore_7392...
3,Sheger online-store,@Shageronlinestore,7391,💥 Sonifer Steam Iron \n\n የልብስ መቶከሻ\n\n💯 ...,2025-06-20 09:03:23+00:00,../data/telegram_media\@Shageronlinestore_7391...
4,Sheger online-store,@Shageronlinestore,7390,💥Sayona multifunctional juicer and extractor\n...,2025-06-20 06:48:11+00:00,../data/telegram_media\@Shageronlinestore_7390...


In [5]:
# check nan values
print("Number of NaN values in each column:")
print(data.isna().sum())

Number of NaN values in each column:
Channel          0
Username         0
Message ID       0
Text          2328
Date             0
Media Path     941
dtype: int64


In [None]:
# remove rows with NaN values in the Text column
data = data.dropna(subset=['Text'])

In [7]:
# check nan values
print("Number of NaN values in each column:")
print(data.isna().sum())

Number of NaN values in each column:
Channel         0
Username        0
Message ID      0
Text            0
Date            0
Media Path    901
dtype: int64


In [None]:
from preprocessing.clean_text import clean_text

data['Cleaned Text'] = data['Text'].apply(clean_text)

# Save cleaned data
data.to_csv('../data/processed/telegram_data_cleaned.csv', index=False)


In [11]:
data

Unnamed: 0,Channel,Username,Message ID,Text,Date,Media Path,Cleaned Text
1,Sheger online-store,@Shageronlinestore,7393,💥 1L Water Bottle\n\n 💯High Quality\n\n⚡...,2025-06-20 11:47:53+00:00,../data/telegram_media\@Shageronlinestore_7393...,1L Water Bottle High Quality 1L water time s...
3,Sheger online-store,@Shageronlinestore,7391,💥 Sonifer Steam Iron \n\n የልብስ መቶከሻ\n\n💯 ...,2025-06-20 09:03:23+00:00,../data/telegram_media\@Shageronlinestore_7391...,Sonifer Steam Iron የልብስ መቶከሻ High Quality ...
4,Sheger online-store,@Shageronlinestore,7390,💥Sayona multifunctional juicer and extractor\n...,2025-06-20 06:48:11+00:00,../data/telegram_media\@Shageronlinestore_7390...,Sayona multifunctional juicer and extractor Be...
10,Sheger online-store,@Shageronlinestore,7384,📣 2in1 long handled bath brush\n\n💥ለአያያዝ ምቹ \n...,2025-06-19 12:31:30+00:00,../data/telegram_media\@Shageronlinestore_7384...,2in1 long handled bath brush ለአያያዝ ምቹ በቀላሉ የማ...
11,Sheger online-store,@Shageronlinestore,7383,💥Miralux Hot plate\n ባለሁለት ምድጃ ስቶቭ\n\n 💯o...,2025-06-19 06:31:31+00:00,../data/telegram_media\@Shageronlinestore_7383...,Miralux Hot plate ባለሁለት ምድጃ ስቶቭ orginal 2000 ...
...,...,...,...,...,...,...,...
6923,HellooMarket,@helloomarketethiopia,3528,**መልካም ገና** ለቤቶ አልያም ለቢሮዎ ቱሊፕ የወሃ ማጣሪያ ለማዘዝ 84...,2024-01-04 09:44:08+00:00,../data/telegram_media\@helloomarketethiopia_3...,መልካም ገና ለቤቶ አልያም ለቢሮዎ ቱሊፕ የወሃ ማጣሪያ ለማዘዝ 8420 ይ...
6924,HellooMarket,@helloomarketethiopia,3527,**መልካም ገና** ባማረ ዲዛይን የተሰሩ ውብ ሴቶች አላባሽ በተለያየ የቀ...,2024-01-03 18:00:20+00:00,../data/telegram_media\@helloomarketethiopia_3...,መልካም ገና ባማረ ዲዛይን የተሰሩ ውብ ሴቶች አላባሽ በተለያየ የቀለም አ...
6925,HellooMarket,@helloomarketethiopia,3526,በተዋበ ዲዛይን ለአያያዝ ምቹ ከቆዳ የተሰራ የሴቶች ቦርሳ ለማዘዝ 8420...,2024-01-03 15:22:51+00:00,../data/telegram_media\@helloomarketethiopia_3...,በተዋበ ዲዛይን ለአያያዝ ምቹ ከቆዳ የተሰራ የሴቶች ቦርሳ ለማዘዝ 8420...
6926,HellooMarket,@helloomarketethiopia,3525,**መልካም ገና** ፈንትዉላለ ጥራት ቱምሃ ፈሳሽ የእጅ ሳሙና ለማዘዝ 84...,2024-01-03 09:27:52+00:00,../data/telegram_media\@helloomarketethiopia_3...,መልካም ገና ፈንትዉላለ ጥራት ቱምሃ ፈሳሽ የእጅ ሳሙና ለማዘዝ 8420 ይ...
