In [1]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from PIL import Image
import io
import csv
import os

def scrape_and_convert_gifs(url, csv_writer, save_folder):
    base_url = "https://tenor.com"
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        img_tags = soup.find_all("img")

        for img in img_tags:
            gif_url = img.get("src")
            alt_text = img.get("alt", "No description")
            if gif_url and gif_url.endswith(".gif"):  # Process only `.gif` files
                if not gif_url.startswith("http"):
                    gif_url = urljoin(base_url, gif_url)

                print(f"Processing: {gif_url}")
                # Convert .gif to .jpeg and save to the folder
                jpeg_file_path = convert_gif_to_jpeg(gif_url, save_folder)
                if jpeg_file_path:
                    # Write the .gif URL, .jpeg file path, and alt text to the CSV
                    csv_writer.writerow([gif_url, os.path.abspath(jpeg_file_path), alt_text])
    else:
        print(f"Failed to fetch {url}. Status code: {response.status_code}")

def convert_gif_to_jpeg(gif_url, save_folder):
    try:
        response = requests.get(gif_url, stream=True)
        if response.status_code == 200:
            gif_image = Image.open(io.BytesIO(response.content))  # Load GIF into memory
            jpeg_image = gif_image.convert("RGB")  # Convert to RGB for .jpeg format

            # Generate .jpeg file name
            gif_name = gif_url.split("/")[-1]
            jpeg_name = gif_name.replace(".gif", ".jpeg")

            # Save the .jpeg in the specified folder
            jpeg_file_path = os.path.join(save_folder, jpeg_name)
            jpeg_image.save(jpeg_file_path, "JPEG")
            print(f"Saved JPEG: {jpeg_file_path}")
            return jpeg_file_path
        else:
            print(f"Failed to download GIF: {gif_url}")
            return None
    except Exception as e:
        print(f"Error converting GIF: {e}")
        return None

# Main script
urls = [
    "https://tenor.com/en-IN/search/sunil-memes-gifs",
    # Add more URLs as needed...
]

# Specify the folder to save JPEG files
save_folder = input("Enter the full path of the folder to save JPEG files: ").strip()

# Ensure the specified folder exists
os.makedirs(save_folder, exist_ok=True)

# Path to the CSV file
csv_file = os.path.join(save_folder, "gif_to_jpeg_links.csv")

try:
    with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
        csv_writer = csv.writer(file)
        csv_writer.writerow(["Original GIF URL", "Converted JPEG File Path", "Image Alt Text"])  # Header

        for url in urls:
            print(f"\nScraping images from: {url}")
            scrape_and_convert_gifs(url, csv_writer, save_folder)
            print("\n" + "-" * 100 + "\n")

    print(f"Data has been written to {csv_file}")
except Exception as e:
    print(f"An error occurred while writing to CSV: {e}")

Enter the full path of the folder to save JPEG files:  test1



Scraping images from: https://tenor.com/en-IN/search/sunil-memes-gifs
Processing: https://media.tenor.com/_a6iQf-mhIwAAAAM/sunil-sontham.gif
Saved JPEG: test1\sunil-sontham.jpeg
Processing: https://media.tenor.com/JMGSNl_NPZEAAAAM/papam-feel-ayinattu-unnadu.gif
Saved JPEG: test1\papam-feel-ayinattu-unnadu.jpeg
Processing: https://media.tenor.com/-j1bIVeF-zsAAAAM/telugu-gifs.gif
Saved JPEG: test1\telugu-gifs.jpeg
Processing: https://media.tenor.com/CvA4r4voLkwAAAAM/sunil.gif
Saved JPEG: test1\sunil.jpeg
Processing: https://media.tenor.com/vzC7WgnH1sIAAAAM/sunil-telugu-andharivadu-comedy-chala-worst-ga-janaala-talk.gif
Saved JPEG: test1\sunil-telugu-andharivadu-comedy-chala-worst-ga-janaala-talk.jpeg
Processing: https://media.tenor.com/HLeUlh7FKkIAAAAM/sunil-comedy.gif
Saved JPEG: test1\sunil-comedy.jpeg
Processing: https://media.tenor.com/-M8uk72qHmMAAAAM/introverts-to-modi-speech-sunil.gif
Saved JPEG: test1\introverts-to-modi-speech-sunil.jpeg
Processing: https://media.tenor.com/AvMls