In [2]:
import pandas as pd
import os
import math
from PIL import Image

In [3]:
def create_cluster_grids():
    # Read the clustered data
    df = pd.read_csv("../text_clusters/df_cluster_1.csv")    #change the file name 
    
    # Create output directory
    output_dir = "../Text_pic/cluster_1"                      #change the file name   
    os.makedirs(output_dir, exist_ok=True)
    
    # Get unique clusters
    clusters = sorted(df['cluster'].unique())
    
    # Set parameters
    thumb_size = 200
    margin = 10  # Margin between thumbnails
    max_images_per_grid = 50
    
    for cluster_id in clusters:
        # Get all images in this cluster
        cluster_images = df[df['cluster'] == cluster_id]
        
        # Split into chunks of max_images_per_grid
        total_images = len(cluster_images)
        num_grids = math.ceil(total_images / max_images_per_grid)
        
        for grid_num in range(num_grids):
            # Get subset of images for this grid
            start_idx = grid_num * max_images_per_grid
            end_idx = min((grid_num + 1) * max_images_per_grid, total_images)
            current_batch = cluster_images.iloc[start_idx:end_idx]
            
            # Calculate grid dimensions (5 columns, n rows)
            n_images = len(current_batch)
            n_cols = 5
            n_rows = math.ceil(n_images / n_cols)
            
            # Create a new image for the grid with margins
            grid_width = (n_cols * thumb_size) + ((n_cols + 1) * margin)
            grid_height = (n_rows * thumb_size) + ((n_rows + 1) * margin)
            grid_image = Image.new('RGB', (grid_width, grid_height), 'white')
            
            # Place each thumbnail in the grid
            for idx, (_, row) in enumerate(current_batch.iterrows()):
                # Calculate position in grid with margins
                row_idx = idx // n_cols
                col_idx = idx % n_cols
                
                x_pos = margin + (col_idx * (thumb_size + margin))
                y_pos = margin + (row_idx * (thumb_size + margin))
                
                # Load and resize thumbnail
                thumb_path = os.path.join("../../../Thumbnails_New_Data/thumbnails-filtered", row['Video ID'] + ".jpg")  #change the path as needed
                try:
                    thumb = Image.open(thumb_path)
                    thumb = thumb.resize((thumb_size, thumb_size))
                    
                    # Paste thumbnail into grid
                    grid_image.paste(thumb, (x_pos, y_pos))
                except Exception as e:
                    print(f"Error processing {row['Video ID']}: {e}")
            
            # Save the grid image with appropriate numbering
            if num_grids == 1:
                output_path = os.path.join(output_dir, f"cluster_{cluster_id}.png")
            else:
                output_path = os.path.join(output_dir, f"cluster_{cluster_id}.{grid_num + 1}.png")
            
            grid_image.save(output_path)
            print(f"Created {output_path} with {n_images} images")

create_cluster_grids()

Created ../Text_pic/cluster_1\cluster_1.1.png with 50 images
Created ../Text_pic/cluster_1\cluster_1.2.png with 50 images
Created ../Text_pic/cluster_1\cluster_1.3.png with 50 images
Created ../Text_pic/cluster_1\cluster_1.4.png with 50 images
Created ../Text_pic/cluster_1\cluster_1.5.png with 50 images
Created ../Text_pic/cluster_1\cluster_1.6.png with 50 images
Created ../Text_pic/cluster_1\cluster_1.7.png with 50 images
Created ../Text_pic/cluster_1\cluster_1.8.png with 50 images
Created ../Text_pic/cluster_1\cluster_1.9.png with 50 images
Created ../Text_pic/cluster_1\cluster_1.10.png with 50 images
Created ../Text_pic/cluster_1\cluster_1.11.png with 50 images
Created ../Text_pic/cluster_1\cluster_1.12.png with 8 images
