### Task 2 -  Data Modeling and Transformation (Transform)

In [1]:
import os
import asyncio
import psycopg2
from telethon import TelegramClient
from telethon.tl.types import MessageMediaPhoto
from dotenv import load_dotenv
import sys
sys.path.append(os.path.abspath("../Script"))
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

In [2]:
os.chdir("..") 
# print(os.getcwd())

In [3]:
from data_scraper import get_pgsql_connection

In [4]:
# Load environment variables
load_dotenv(dotenv_path="../.env")  # adjust the path if needed
# Access the variables
api_id = os.getenv("tg_api_id")
api_hash = os.getenv("tg_api_hash")
session_name = os.getenv("session_name")
# PostgreSQL connection
# get_con = get_pgsql_connection()

# Async image scraping function
async def scrape_images_async(channel_username, limit=50):
    client = TelegramClient(session_name, api_id, api_hash)
    await client.start()
    print(f"Connected to Telegram. Scraping images from {channel_username}...")

    conn = get_pgsql_connection()
    if conn is None:
        return
    cur = conn.cursor()

    save_dir = os.path.join("images", channel_username.strip("@"))
    os.makedirs(save_dir, exist_ok=True)

    try:
        async for message in client.iter_messages(channel_username,
limit=limit):
            if message.media and isinstance(message.media, MessageMediaPhoto):
                try:
                    file_name = f"{message.id}.jpg"
                    file_path = os.path.join(save_dir, file_name)
                    await client.download_media(message, file_path)

                    cur.execute("""
                        INSERT INTO images (message_id, channel_name,
file_name, file_path, posted_at)
                        VALUES (%s, %s, %s, %s, %s)
                        ON CONFLICT DO NOTHING
                    """, (
                        message.id,
                        channel_username,
                        file_name,
                        file_path,
                        message.date
                    ))

                    print(f"Saved image: {file_name}")
                except Exception as insert_err:
                    print(f"Error inserting {message.id}: {insert_err}")

        conn.commit()
    except Exception as scrape_err:
        print(f"Scraping failed: {scrape_err}")
    finally:
        cur.close()
        conn.close()
        await client.disconnect()
        print("Disconnected from Telegram.")

In [5]:
await scrape_images_async("@lobelia4cosmetics", limit=30)

Connected to Telegram. Scraping images from @lobelia4cosmetics...
Saved image: 18570.jpg
Saved image: 18569.jpg
Saved image: 18568.jpg
Saved image: 18567.jpg
Saved image: 18566.jpg
Saved image: 18565.jpg
Saved image: 18564.jpg
Saved image: 18563.jpg
Saved image: 18562.jpg
Saved image: 18561.jpg
Saved image: 18560.jpg
Saved image: 18559.jpg
Saved image: 18558.jpg
Saved image: 18557.jpg
Saved image: 18556.jpg
Saved image: 18555.jpg
Saved image: 18554.jpg
Saved image: 18553.jpg
Saved image: 18552.jpg
Saved image: 18551.jpg
Saved image: 18550.jpg
Saved image: 18549.jpg
Saved image: 18548.jpg
Saved image: 18547.jpg
Saved image: 18546.jpg
Saved image: 18545.jpg
Saved image: 18544.jpg
Saved image: 18543.jpg
Saved image: 18542.jpg
Saved image: 18541.jpg
Disconnected from Telegram.
