Install OpenCV and Scikit-learn in Google Colab

In [None]:
pip install opencv-python scikit-learn

In [None]:
pip install openpyxl

In [None]:
pip install gdown

For extraction in google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Color Extraction and CSV file

**Color Extraction and CSV file**

Extracts the colors and the top 5 dominant colors from images and saves the information into a CSV file.

Structure of the CSV file:
*   **Name**: The image name.
*   **Top 5 Dominant Colors**: The 5 most dominant colors in the image
*   **Top 5 Dominant Colors Percentage**: The 5 most dominant colors in the image, sorted by percentage.




In [None]:
import cv2
import numpy as np
from sklearn.cluster import KMeans
import os
import csv
import matplotlib.pyplot as plt
from google.colab import drive  # For Google Drive access

# Mount Google Drive
drive.mount('/content/drive')

# Function to extract dominant colors from an image
def extract_dominant_colors(image_path, k=5):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    pixels = image.reshape(-1, 3)

    # KMeans clustering to find k dominant colors
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(pixels)

    # Get the cluster centers (dominant colors) and their labels
    dominant_colors = kmeans.cluster_centers_.astype(int)
    labels = kmeans.labels_

    # Count the number of pixels assigned to each dominant color
    label_counts = np.bincount(labels)

    # Calculate the percentage of each color
    total_pixels = len(labels)
    color_percentages = [(label_counts[i] / total_pixels) * 100 for i in range(k)]

    # Rank the colors by percentage
    ranked_colors = sorted(zip(dominant_colors, color_percentages), key=lambda x: -x[1])

    return ranked_colors

# Function to display the dominant colors as a palette
def display_palette(dominant_colors, title):
    palette = np.zeros((50, 300, 3), dtype='uint8')
    step = 300 // len(dominant_colors)

    for i, (color, _) in enumerate(dominant_colors):
        palette[:, i * step:(i + 1) * step] = color

    plt.figure(figsize=(8, 2))
    plt.imshow(palette)
    plt.title(title)
    plt.axis('off')
    plt.show()

# Set the path to the folder in Google Drive containing the images
image_folder = '/content/drive/MyDrive/Thumbnails'  # Replace with your folder path in Drive

# Get a list of image file names in the folder
image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]

# Create a CSV file and write headers
with open('image_colors.csv', mode='w', newline='') as file:
    writer = csv.writer(file)

    # Write the header row
    writer.writerow(['Name', 'Top 5 Dominant Colors', 'Top 5 Dominant Colors Percentage'])

    # Process each image and extract top 5 dominant colors
    for image_file in image_files:
        image_path = os.path.join(image_folder, image_file)
        dominant_colors = extract_dominant_colors(image_path, k=5)

        # Prepare the top 5 dominant colors and their percentages
        colors_list = [color.tolist() for color, _ in dominant_colors]  # List of colors (RGB)
        percentages_list = [(f"{color.tolist()} , {round(percentage, 2)}%") for color, percentage in dominant_colors]  # List of percentages with color

        # Write the image data into the CSV
        writer.writerow([image_file, colors_list, ', '.join(percentages_list)])

        # Display the dominant colors for the current image
        display_palette(dominant_colors, f"Top 5 Dominant Colors for {image_file}")

print("Data has been saved to 'image_colors.csv'.")


# For Searching the Colors of the Thumbnails

In [None]:
import csv
import numpy as np
import matplotlib.pyplot as plt

# Function to display the dominant colors as a palette
def display_palette(dominant_colors, title):
    palette = np.zeros((50, 300, 3), dtype='uint8')
    step = 300 // len(dominant_colors)

    for i, color in enumerate(dominant_colors):
        palette[:, i * step:(i + 1) * step] = color

    plt.figure(figsize=(8, 2))
    plt.imshow(palette)
    plt.title(title)
    plt.axis('off')
    plt.show()

# Function to search for an image by its ID (name) and display its colors
def search_image_by_id(image_id, csv_file_path):
    with open(csv_file_path, mode='r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip the header row

        for row in reader:
            if row[0] == image_id:
                # Extract dominant colors from the CSV file
                dominant_colors_str = row[1].strip('[]').split('], [')
                dominant_colors = [list(map(int, color.replace('[', '').replace(']', '').split(','))) for color in dominant_colors_str]

                # Display the palette for the found image
                display_palette(dominant_colors, f"Dominant Colors for {image_id}")
                return

        print(f"Image ID {image_id} not found.")

# Example usage: Replace '6refuE2GuO4.jpg' with the actual image ID from your CSV file
csv_file_path = 'image_colors (sept 24, 2024).csv'  # Replace with the path to your CSV file
search_image_by_id('4XdkQzkWRR4.jpg', csv_file_path)  # Replace with the image ID you want to search


#For Searching the colors and Showing the Thumbnails

In [None]:
import csv
import numpy as np
import matplotlib.pyplot as plt
import cv2
from google.colab import drive

# Mount Google Drive (if the images are stored in Google Drive)
drive.mount('/content/drive')

# Function to display the dominant colors as a palette
def display_palette(dominant_colors):
    palette = np.zeros((50, 300, 3), dtype='uint8')
    step = 300 // len(dominant_colors)

    for i, color in enumerate(dominant_colors):
        palette[:, i * step:(i + 1) * step] = color

    return palette

# Function to load and display the image alongside its color palette
def display_image_and_colors(image_path, dominant_colors):
    # Read the image
    img = cv2.imread(image_path)
    print(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB for display

    # Get the color palette
    palette = display_palette(dominant_colors)

    # Create a plot to display both image and color palette
    fig, axs = plt.subplots(1, 2, figsize=(12, 6))

    # Display the image
    axs[0].imshow(img)
    axs[0].set_title('Image')
    axs[0].axis('off')

    # Display the color palette
    axs[1].imshow(palette)
    axs[1].set_title('Dominant Colors')
    axs[1].axis('off')

    plt.show()

# Function to search for an image by its ID (name) and display its colors
def search_image_by_id(image_id, csv_file_path, image_folder):
    with open(csv_file_path, mode='r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip the header row

        for row in reader:
            if row[0] == image_id:
                # Extract dominant colors from the CSV file
                dominant_colors_str = row[1].strip('[]').split('], [')
                dominant_colors = [list(map(int, color.replace('[', '').replace(']', '').split(','))) for color in dominant_colors_str]

                # Display the image and its color palette
                image_path = f"{image_folder}/{image_id}"  # Construct the full image path
                display_image_and_colors(image_path, dominant_colors)
                return

        print(f"Image ID {image_id} not found.")


csv_file_path = '/content/image_colors (Oct 9, 2024).csv'  # Replace with the path to your CSV file
image_folder = '/content/drive/MyDrive/Thesis/thumbnails'  # Replace with the folder where your images are stored
search_image_by_id('_GT_jJZhO-A.jpg', csv_file_path, image_folder)  # Replace with the image ID you want to search


# For clustering all the colors (csv)

In [None]:
import csv
import numpy as np
from sklearn.cluster import KMeans
import pandas as pd
import matplotlib.pyplot as plt

# Function to extract dominant colors using KMeans
def extract_dominant_colors(rgb_data, k=5):
    pixels = np.array(rgb_data)

    # Apply KMeans clustering
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(pixels)

    # Get cluster centers (dominant colors)
    dominant_colors = kmeans.cluster_centers_.astype(int)
    return dominant_colors

# Load the CSV file with RGB data
csv_file_path = '/content/image_colors (sept 24, 2024).csv'
df = pd.read_csv(csv_file_path)

# Prepare an output CSV file
output_file_path = '/content/top_5_dominant_colors.csv'
with open(output_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)

    # Write the header row
    writer.writerow(['Top 5 Dominant Colors (RGB)'])

    # Concatenate all the RGB data
    all_rgb_data = []
    for index, row in df.iterrows():
        rgb_data = eval(row[1])  # Assuming the RGB data is stored as a list of tuples
        all_rgb_data.extend(rgb_data)

    # Get the top 5 dominant colors from all data combined
    dominant_colors = extract_dominant_colors(all_rgb_data, k=5)

    # Write the top 5 dominant colors into the CSV
    for color in dominant_colors:
        writer.writerow([color.tolist()])

# Display the top 5 dominant colors as a palette
def display_palette(dominant_colors):
    palette = np.zeros((50, 300, 3), dtype='uint8')
    step = 300 // len(dominant_colors)

    for i, color in enumerate(dominant_colors):
        palette[:, i * step:(i + 1) * step] = color

    plt.figure(figsize=(8, 2))
    plt.imshow(palette)
    plt.title("Top 5 Dominant Colors")
    plt.axis('off')
    plt.show()

# Display the colors in Colab
display_palette(dominant_colors)

# Output file path
output_file_path


# Extraction For almost 3k thumbnails

In [None]:
import cv2
import numpy as np
from sklearn.cluster import KMeans
import os
import csv
import matplotlib.pyplot as plt
from google.colab import drive  # For Google Drive access

# Mount Google Drive
drive.mount('/content/drive')

# Function to extract dominant colors from an image
def extract_dominant_colors(image_path, k=5):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    pixels = image.reshape(-1, 3)

    # KMeans clustering to find k dominant colors
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(pixels)

    # Get the cluster centers (dominant colors) and their labels
    dominant_colors = kmeans.cluster_centers_.astype(int)
    labels = kmeans.labels_

    # Count the number of pixels assigned to each dominant color
    label_counts = np.bincount(labels)

    # Calculate the percentage of each color
    total_pixels = len(labels)
    color_percentages = [(label_counts[i] / total_pixels) * 100 for i in range(k)]

    # Rank the colors by percentage
    ranked_colors = sorted(zip(dominant_colors, color_percentages), key=lambda x: -x[1])

    return ranked_colors

# Function to display the dominant colors as a palette
def display_palette(dominant_colors, title):
    palette = np.zeros((50, 300, 3), dtype='uint8')
    step = 300 // len(dominant_colors)

    for i, (color, _) in enumerate(dominant_colors):
        palette[:, i * step:(i + 1) * step] = color

    plt.figure(figsize=(8, 2))
    plt.imshow(palette)
    plt.title(title)
    plt.axis('off')
    plt.show()

# Set the path to the folder in Google Drive containing the images
image_folder = '/content/drive/MyDrive/Thesis/thumbnails'  # Replace with your folder path in Drive

# Get a list of image file names in the folder
image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]

# Create a CSV file and write headers
with open('image_colors.csv', mode='w', newline='') as file:
    writer = csv.writer(file)

    # Write the header row
    writer.writerow(['Name', 'Top 5 Dominant Colors', 'Top 5 Dominant Colors Percentage'])

    # Process each image and extract top 5 dominant colors
    for image_file in image_files:
        image_path = os.path.join(image_folder, image_file)
        dominant_colors = extract_dominant_colors(image_path, k=5)

        # Prepare the top 5 dominant colors and their percentages
        colors_list = [color.tolist() for color, _ in dominant_colors]  # List of colors (RGB)
        percentages_list = [(f"{color.tolist()} , {round(percentage, 2)}%") for color, percentage in dominant_colors]  # List of percentages with color

        # Write the image data into the CSV
        writer.writerow([image_file, colors_list, ', '.join(percentages_list)])

        # Display the dominant colors for the current image
        display_palette(dominant_colors, f"Top 5 Dominant Colors for {image_file}")

print("Data has been saved to 'image_colors.csv'.")

# Color Profile

In [None]:
import cv2
import numpy as np
import os
import csv
import matplotlib.pyplot as plt

# Predefined color profile
color_profile = {
    'Yellow': np.array([255, 255, 0]),
    'Orange': np.array([255, 165, 0]),
    'Red': np.array([255, 0, 0]),
    'Purple': np.array([128, 0, 128]),
    'Blue': np.array([0, 0, 255]),
    'Green': np.array([0, 255, 0]),
    'Black': np.array([0, 0, 0]),
    'White': np.array([255, 255, 255])
}

# Function to calculate the proportion of each predefined color in an image
def calculate_color_proportions(image_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    pixels = image.reshape(-1, 3)

    color_counts = {name: 0 for name in color_profile.keys()}

    for pixel in pixels:
        distances = {name: np.linalg.norm(pixel - profile_color) for name, profile_color in color_profile.items()}
        closest_color = min(distances, key=distances.get)
        color_counts[closest_color] += 1

    total_pixels = len(pixels)
    color_proportions = {color: (count / total_pixels) * 100 for color, count in color_counts.items()}

    return color_proportions

# Function to display the color proportions as a bar chart with the thumbnail
def display_color_proportions_with_thumbnail(color_proportions, image_path, title):
    colors = list(color_proportions.keys())
    proportions = list(color_proportions.values())

    fig, axs = plt.subplots(1, 2, figsize=(12, 5))

    # Plot each bar with specific edge color for white
    for i, color in enumerate(colors):
        edge_color = 'black' if color == 'White' else None
        axs[0].bar(color, proportions[i], color=color_profile[color] / 255, edgecolor=edge_color)

    axs[0].set_title(title)
    axs[0].set_xlabel('Colors')
    axs[0].set_ylabel('Proportion (%)')

    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    axs[1].imshow(img)
    axs[1].axis('off')

    plt.tight_layout()
    plt.show()

# Set the path to the folder containing the images
image_folder = r'C:\Users\yesha\Desktop\College\4th year\1st Semester\THS-ST1\thumbnail_extract\thumbnails'

image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]

with open('image_colors_proportions.csv', mode='w', newline='') as file:
    writer = csv.writer(file)

    writer.writerow(['Name', 'Yellow (%)', 'Orange (%)', 'Red (%)', 'Purple (%)', 'Violet (%)', 'Blue (%)', 'Green (%)', 'Black (%)', 'White (%)'])

    for image_file in image_files:
        image_path = os.path.join(image_folder, image_file)
        color_proportions = calculate_color_proportions(image_path)

        writer.writerow([image_file] + [round(color_proportions[color], 2) for color in color_profile.keys()])

        display_color_proportions_with_thumbnail(color_proportions, image_path, f"Color Proportions for {image_file}")

print("Data has been saved to 'image_colors_proportions.csv'.")


Fix the white distribution on the csv and changed purple to violet (for now), do the 100 for now

In [None]:
import cv2
import numpy as np
import os
import csv
import matplotlib.pyplot as plt

# Predefined color profile
color_profile = {
    'Yellow': np.array([255, 255, 0]),
    'Orange': np.array([255, 165, 0]),
    'Red': np.array([255, 0, 0]),
    'Violet': np.array([238, 130, 238]),
    'Blue': np.array([0, 0, 255]),
    'Green': np.array([0, 255, 0]),
    'Black': np.array([0, 0, 0]),
    'White': np.array([255, 255, 255])
}

# Tolerance for matching White pixels
white_tolerance = 30  # Adjust as needed

# Function to calculate the proportion of each predefined color in an image
def calculate_color_proportions(image_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    pixels = image.reshape(-1, 3)

    color_counts = {name: 0 for name in color_profile.keys()}

    for pixel in pixels:
        if np.all(np.abs(pixel - color_profile['White']) <= white_tolerance):
            color_counts['White'] += 1
        else:
            # Calculate distances to other colors
            distances = {name: np.linalg.norm(pixel - profile_color) for name, profile_color in color_profile.items() if name != 'White'}
            closest_color = min(distances, key=distances.get)
            color_counts[closest_color] += 1

    total_pixels = len(pixels)
    color_proportions = {color: (count / total_pixels) * 100 for color, count in color_counts.items()}

    return color_proportions

# Function to display the color proportions as a bar chart with the thumbnail
def display_color_proportions_with_thumbnail(color_proportions, image_path, title):
    colors = list(color_proportions.keys())
    proportions = list(color_proportions.values())

    fig, axs = plt.subplots(1, 2, figsize=(12, 5))

    # Plot each bar with specific edge color for white
    for i, color in enumerate(colors):
        edge_color = 'black' if color == 'White' else None
        axs[0].bar(color, proportions[i], color=color_profile[color] / 255, edgecolor=edge_color)

    axs[0].set_title(title)
    axs[0].set_xlabel('Colors')
    axs[0].set_ylabel('Proportion (%)')

    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    axs[1].imshow(img)
    axs[1].axis('off')

    plt.tight_layout()
    plt.show()

# Set the path to the folder containing the images
image_folder = r'C:\Users\yesha\Desktop\College\4th year\1st Semester\THS-ST1\thumbnail_extract\thumbnails'

image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]

# Limit to the first 100 thumbnails
image_files = image_files[:100]

with open('image_colors_proportions_100.csv', mode='w', newline='') as file:
    writer = csv.writer(file)

    writer.writerow(['Name', 'Yellow (%)', 'Orange (%)', 'Red (%)', 'Purple (%)', 'Violet (%)', 'Blue (%)', 'Green (%)', 'Black (%)', 'White (%)'])

    for image_file in image_files:
        image_path = os.path.join(image_folder, image_file)
        color_proportions = calculate_color_proportions(image_path)

        writer.writerow([image_file] + [round(color_proportions[color], 2) for color in color_profile.keys()])

        display_color_proportions_with_thumbnail(color_proportions, image_path, f"Color Proportions for {image_file}")

print("Data has been saved to 'image_colors_proportions_100.csv'.")


updated code for 100 v2, for the white proportion in csv file, there is a white proportion now in csv file

In [None]:
import cv2
import numpy as np
import os
import csv
import matplotlib.pyplot as plt

# Predefined color profile
color_profile = {
    'Yellow': np.array([255, 255, 0]),
    'Orange': np.array([255, 165, 0]),
    'Red': np.array([255, 0, 0]),
    'Violet': np.array([238, 130, 238]),
    'Blue': np.array([0, 0,255]),
    'Green': np.array([0, 255, 0]),
    'Black': np.array([0, 0, 0]),
    'White': np.array([255, 255, 255])
}

# Tolerance for white pixel detection
white_tolerance = 30  # Adjust as needed

# Function to calculate the proportion of each predefined color in an image
def calculate_color_proportions(image_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    pixels = image.reshape(-1, 3)

    color_counts = {name: 0 for name in color_profile.keys()}

    # Vectorized white pixel detection
    is_white = np.all(np.abs(pixels - color_profile['White']) <= white_tolerance, axis=1)
    color_counts['White'] = np.sum(is_white)

    # Detect other colors for non-white pixels
    non_white_pixels = pixels[~is_white]
    for pixel in non_white_pixels:
        distances = {name: np.linalg.norm(pixel - profile_color) for name, profile_color in color_profile.items() if name != 'White'}
        closest_color = min(distances, key=distances.get)
        color_counts[closest_color] += 1

    total_pixels = len(pixels)
    color_proportions = {color: (count / total_pixels) * 100 for color, count in color_counts.items()}

    return color_proportions

# Function to display the color proportions as a bar chart with the thumbnail
def display_color_proportions_with_thumbnail(color_proportions, image_path, title):
    colors = list(color_proportions.keys())
    proportions = list(color_proportions.values())

    fig, axs = plt.subplots(1, 2, figsize=(12, 5))

    # Plot each bar with specific edge color for white
    for i, color in enumerate(colors):
        edge_color = 'black' if color == 'White' else None
        axs[0].bar(color, proportions[i], color=color_profile[color] / 255, edgecolor=edge_color)

    axs[0].set_title(title)
    axs[0].set_xlabel('Colors')
    axs[0].set_ylabel('Proportion (%)')

    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    axs[1].imshow(img)
    axs[1].axis('off')

    plt.tight_layout()
    plt.show()

# Set the path to the folder containing the images
image_folder = r'C:\Users\yesha\Desktop\College\4th year\1st Semester\THS-ST1\thumbnail_extract\thumbnails'

image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]

# Limit to the first 100 thumbnails
image_files = image_files[:100]

with open('image_colors_proportions_100v2.0.csv', mode='w', newline='') as file:
    writer = csv.writer(file)

    writer.writerow(['Name', 'Yellow (%)', 'Orange (%)', 'Red (%)', 'Violet (%)', 'Blue (%)', 'Green (%)', 'Black (%)', 'White (%)'])

    for image_file in image_files:
        image_path = os.path.join(image_folder, image_file)
        color_proportions = calculate_color_proportions(image_path)

        writer.writerow([image_file] + [round(color_proportions[color], 2) for color in color_profile.keys()])

        display_color_proportions_with_thumbnail(color_proportions, image_path, f"Color Proportions for {image_file}")

print("Data has been saved to 'image_colors_proportions_100v2.0.csv'.")


## get rid of violet in the color profile for now

In [None]:
import cv2
import numpy as np
import os
import csv
import matplotlib.pyplot as plt

# Predefined color profile
color_profile = {
    'Yellow': np.array([255, 255, 0]),
    'Orange': np.array([255, 165, 0]),
    'Red': np.array([255, 0, 0]),
    'Violet': np.array([238, 130, 238]),
    'Blue': np.array([0, 0, 255]),
    'Green': np.array([0, 255, 0]),
    'Black': np.array([0, 0, 0]),
    'White': np.array([255, 255, 255]),
}

# Precompute squared values for faster distance calculations
color_profile_squared = {name: np.sum(color ** 2) for name, color in color_profile.items()}

# Tolerance for white pixel detection
white_tolerance_squared = 30 ** 2  # Use squared tolerance to avoid sqrt operations

def calculate_color_proportions(image_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    pixels = image.reshape(-1, 3)

    color_counts = {name: 0 for name in color_profile.keys()}

    # Vectorized white pixel detection
    white_diffs = np.sum((pixels - color_profile['White']) ** 2, axis=1)
    is_white = white_diffs <= white_tolerance_squared
    color_counts['White'] = np.sum(is_white)

    # Filter non-white pixels
    non_white_pixels = pixels[~is_white]

    if non_white_pixels.size > 0:
        # Vectorized color distance computation for remaining colors
        non_white_pixels_squared = np.sum(non_white_pixels ** 2, axis=1, keepdims=True)
        distances = {
            name: np.sum(non_white_pixels * color, axis=1) - 2 * color_profile_squared[name]
            for name, color in color_profile.items() if name != 'White'
        }

        # Identify the closest color for each pixel
        closest_colors = np.argmin(np.column_stack(list(distances.values())), axis=1)
        color_names = list(distances.keys())

        for idx in closest_colors:
            color_counts[color_names[idx]] += 1

    total_pixels = len(pixels)
    color_proportions = {color: (count / total_pixels) * 100 for color, count in color_counts.items()}

    return color_proportions

def display_color_proportions_with_thumbnail(color_proportions, image_path, title):
    colors = list(color_proportions.keys())
    proportions = list(color_proportions.values())

    fig, axs = plt.subplots(1, 2, figsize=(12, 5))

    # Plot each bar with specific edge color for white
    for i, color in enumerate(colors):
        edge_color = 'black' if color == 'White' else None
        axs[0].bar(color, proportions[i], color=color_profile[color] / 255, edgecolor=edge_color)

    axs[0].set_title(title)
    axs[0].set_xlabel('Colors')
    axs[0].set_ylabel('Proportion (%)')

    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    axs[1].imshow(img)
    axs[1].axis('off')

    plt.tight_layout()
    plt.show()

# Set the path to the folder containing the images
image_folder = r'C:\Users\yesha\Desktop\College\4th year\1st Semester\THS-ST1\thumbnail_extract\thumbnails'

image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))][:100]

output_csv = 'optimized_image_colors_proportions.csv'

# Process all images and save results
with open(output_csv, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Name'] + list(color_profile.keys()))

    for image_file in image_files:
        image_path = os.path.join(image_folder, image_file)
        color_proportions = calculate_color_proportions(image_path)

        # Save proportions to the CSV file
        writer.writerow([image_file] + [round(color_proportions[color], 2) for color in color_profile.keys()])

        # Display color proportions and thumbnail
        display_color_proportions_with_thumbnail(color_proportions, image_path, f"Color Proportions for {image_file}")

print(f"Data has been saved to '{output_csv}'.")


## Downscaling

In [None]:
import cv2
import numpy as np
import os
import csv
import matplotlib.pyplot as plt

# Predefined color profile (normalized for calculation)
color_profile = {
    'Yellow': np.array([255, 255, 0]),
    'Orange': np.array([255, 165, 0]),
    'Red': np.array([255, 0, 0]),
    #'Violet': np.array([238, 130, 238]),
    'Blue': np.array([0, 0, 255]),
    'Green': np.array([0, 255, 0]),
    'Black': np.array([0, 0, 0]),
    'White': np.array([255, 255, 255])
}

# Convert color profile for faster computation
color_names = list(color_profile.keys())
color_array = np.array(list(color_profile.values()))

# Function to calculate the proportion of each predefined color in an image
def calculate_color_proportions(image_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Downscale the image for faster computation
    image = cv2.resize(image, (100, 100), interpolation=cv2.INTER_AREA)
    pixels = image.reshape(-1, 3)

    # Compute distances in a vectorized manner
    distances = np.linalg.norm(pixels[:, None] - color_array[None, :], axis=2)
    closest_colors = np.argmin(distances, axis=1)

    # Count occurrences of each color
    unique, counts = np.unique(closest_colors, return_counts=True)
    color_counts = dict(zip(unique, counts))

    # Calculate proportions
    total_pixels = len(pixels)
    color_proportions = {color_names[i]: (color_counts.get(i, 0) / total_pixels) * 100 for i in range(len(color_names))}

    return color_proportions

# Function to display the color proportions as a bar chart with the thumbnail
def display_color_proportions_with_thumbnail(color_proportions, image_path, title):
    colors = list(color_proportions.keys())
    proportions = list(color_proportions.values())

    fig, axs = plt.subplots(1, 2, figsize=(12, 5))

   # Bar chart
    axs[0].bar(
        colors, 
        proportions, 
        color=np.array(list(color_profile.values())) / 255, 
        edgecolor=['black' if color == 'White' else 'none' for color in colors]  # Black border for the White bar
    )
    axs[0].set_title(title)
    axs[0].set_xlabel('Colors')
    axs[0].set_ylabel('Proportion (%)')

    # Thumbnail
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    axs[1].imshow(img)
    axs[1].axis('off')

    plt.tight_layout()
    plt.show()

# Set the path to the folder containing the images
image_folder = r'C:\Users\yesha\Desktop\College\4th year\1st Semester\THS-ST1\thumbnail_extract\thumbnails'  # Replace with the path to your images

# Limit processing to the first 100 images
image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))][:1571]

# Save results to CSV
with open('downscaling_1571_no_violet.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Name'] + list(color_profile.keys()))

    for image_file in image_files:
        image_path = os.path.join(image_folder, image_file)
        color_proportions = calculate_color_proportions(image_path)

        writer.writerow([image_file] + [round(color_proportions[color], 2) for color in color_profile.keys()])

        display_color_proportions_with_thumbnail(color_proportions, image_path, f"Color Proportions for {image_file}")

print("Processing complete. Results saved to 'downscaling_1571_no_violet.csv'.")


## putting the graph and thumbnail in a file 

In [None]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
from openpyxl import Workbook
from openpyxl.drawing.image import Image as ExcelImage
from io import BytesIO
from PIL import Image as PILImage

# Predefined color profile
color_profile = {
    'Yellow': np.array([255, 255, 0]),
    'Orange': np.array([255, 165, 0]),
    'Red': np.array([255, 0, 0]),
    'Violet': np.array([238, 130, 238]),
    'Blue': np.array([0, 0, 255]),
    'Green': np.array([0, 255, 0]),
    'Black': np.array([0, 0, 0]),
    'White': np.array([255, 255, 255])
}

# Convert color profile for faster computation
color_names = list(color_profile.keys())
color_array = np.array(list(color_profile.values()))

# Function to calculate the proportion of each predefined color in an image
def calculate_color_proportions(image_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (100, 100), interpolation=cv2.INTER_AREA)
    pixels = image.reshape(-1, 3)

    # Compute distances and find closest colors
    distances = np.linalg.norm(pixels[:, None] - color_array[None, :], axis=2)
    closest_colors = np.argmin(distances, axis=1)

    # Count occurrences
    unique, counts = np.unique(closest_colors, return_counts=True)
    total_pixels = len(pixels)
    color_counts = dict(zip(unique, counts))

    # Calculate proportions
    color_proportions = {color_names[i]: (color_counts.get(i, 0) / total_pixels) * 100 for i in range(len(color_names))}
    return color_proportions

# Function to create a bar chart and return it as an image
def create_color_bar_chart(color_proportions):
    colors = list(color_proportions.keys())
    proportions = list(color_proportions.values())

    # Create a plot
    fig, ax = plt.subplots(figsize=(6, 4))
    ax.bar(colors, proportions, color=np.array(list(color_profile.values())) / 255)
    ax.set_title('Color Proportions')
    ax.set_xlabel('Colors')
    ax.set_ylabel('Proportion (%)')

    # Save the plot to an image in memory
    buffer = BytesIO()
    plt.savefig(buffer, format='png')
    buffer.seek(0)
    plt.close(fig)
    return PILImage.open(buffer)

# Set the path to the folder containing the images
image_folder = r'C:\Users\yesha\Desktop\College\4th year\1st Semester\THS-ST1\thumbnail_extract\thumbnails'  # Replace with the path to your images
image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))][:10]  # Limit for testing

# Create an Excel workbook
workbook = Workbook()
sheet = workbook.active
sheet.title = "Image Analysis"
sheet.append(['Image Name', 'Color Proportions', 'Thumbnail', 'Graph'])

# Process each image
for row, image_file in enumerate(image_files, start=2):
    image_path = os.path.join(image_folder, image_file)

    # Calculate color proportions
    color_proportions = calculate_color_proportions(image_path)

    # Create the color bar chart
    chart_image = create_color_bar_chart(color_proportions)

    # Insert the thumbnail
    thumbnail = PILImage.open(image_path)
    thumbnail = thumbnail.resize((150, 150))  # Resize for Excel
    thumbnail_path = f"temp_thumbnail_{row}.png"
    thumbnail.save(thumbnail_path)

    excel_thumbnail = ExcelImage(thumbnail_path)
    excel_thumbnail.anchor = f'C{row}'
    sheet.add_image(excel_thumbnail)

    # Insert the graph
    graph_path = f"temp_graph_{row}.png"
    chart_image.save(graph_path)
    excel_graph = ExcelImage(graph_path)
    excel_graph.anchor = f'D{row}'
    sheet.add_image(excel_graph)

    # Add image name and proportions to Excel
    proportions_text = ", ".join(f"{color}: {round(percentage, 2)}%" for color, percentage in color_proportions.items())
    sheet.cell(row=row, column=1, value=image_file)
    sheet.cell(row=row, column=2, value=proportions_text)

# Save the workbook
workbook.save('image_analysis.xlsx')
print("Processing complete. Results saved to 'image_analysis.xlsx'.")
