In [11]:
from PIL import Image
import os
import logging
from multiprocessing import Pool

def resize_and_compress_image(input_path, output_path, target_size_kb):
    try:
        with Image.open(input_path) as img:
            # Convert to RGB mode if the image is in a different mode (e.g., RGBA)
            img = img.convert("RGB")

            # Resize the image while maintaining aspect ratio
            img.thumbnail((img.width, img.height))

            # Reduce image size incrementally until it meets the target size
            while os.path.getsize(output_path) > target_size_kb * 1024:
                img.save(output_path, optimize=True)
                img = img.resize((int(img.width * 0.9), int(img.height * 0.9)))
    except Exception as e:
        logging.error(f"Error processing {input_path}: {e}")

def resize_and_compress_images(input_dir, output_dir, target_size_kb, batch_size=100):
    # Create the output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Get a list of all image files in the input directory
    image_files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f)) and f.lower().endswith('.png')]

    # Process images in batches
    num_images = len(image_files)
    num_batches = (num_images + batch_size - 1) // batch_size

    logging.info(f"Processing {num_images} images in {num_batches} batches")

    for i in range(num_batches):
        batch_files = image_files[i * batch_size: (i + 1) * batch_size]

        # Define the function arguments for each image
        resize_args = [(os.path.join(input_dir, image_file), os.path.join(output_dir, image_file), target_size_kb) for image_file in batch_files]

        # Process images in parallel
        with Pool() as p:
            p.starmap(resize_and_compress_image, resize_args)

        logging.info(f"Processed batch {i + 1}/{num_batches}")

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

    input_folder = "symbols"
    output_folder = "resized_symbols1"
    target_size_kb = 10

    resize_and_compress_images(input_folder, output_folder, target_size_kb)
    logging.info("Image resizing and compression complete.")


2024-06-09 13:38:50,712 - INFO - Processing 12599 images in 126 batches


In [2]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

input_text = "Your input text here"
inputs = tokenizer.encode(input_text, return_tensors='pt')
outputs = model.generate(inputs, max_length=150)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)


  from .autonotebook import tqdm as notebook_tqdm


ImportError: 
GPT2LMHeadModel requires the PyTorch library but it was not found in your environment.
However, we were able to find a TensorFlow installation. TensorFlow classes begin
with "TF", but are otherwise identically named to our PyTorch classes. This
means that the TF equivalent of the class you tried to import would be "TFGPT2LMHeadModel".
If you want to use TensorFlow, please use TF classes instead!

If you really do want to use PyTorch please go to
https://pytorch.org/get-started/locally/ and follow the instructions that
match your environment.
