<a href="https://colab.research.google.com/github/runningman145/Food-Quality-Assesment/blob/main/Fruit_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import matplotlib.pyplot as plt
from collections import Counter

def analyze_class_distribution(dataset_path):
    class_counts = Counter()

    for root, dirs, files in os.walk(dataset_path):
        for dir in dirs:
            class_path = os.path.join(root, dir)
            class_counts[dir] = len(os.listdir(class_path))

    classes = list(class_counts.keys())
    counts = list(class_counts.values())

    plt.figure(figsize=(12, 6))
    plt.bar(classes, counts)
    plt.title('Distribution of Images Across Fruit Categories')
    plt.xlabel('Fruit Category')
    plt.ylabel('Number of Images')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig('class_distribution.png')
    plt.close()

    return class_counts

# Usage
dataset_path = 'path/to/your/dataset'
class_distribution = analyze_class_distribution(dataset_path)
print(class_distribution)

In [None]:
import cv2
import os
import numpy as np

def apply_noise_reduction(image_path, output_path, method='gaussian', kernel_size=5):
    img = cv2.imread(image_path)

    if method == 'gaussian':
        denoised = cv2.GaussianBlur(img, (kernel_size, kernel_size), 0)
    elif method == 'median':
        denoised = cv2.medianBlur(img, kernel_size)
    else:
        raise ValueError("Method must be 'gaussian' or 'median'")

    cv2.imwrite(output_path, denoised)

def process_dataset(input_dir, output_dir, method='gaussian', kernel_size=5):
    for root, dirs, files in os.walk(input_dir):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                input_path = os.path.join(root, file)
                relative_path = os.path.relpath(input_path, input_dir)
                output_path = os.path.join(output_dir, relative_path)

                os.makedirs(os.path.dirname(output_path), exist_ok=True)
                apply_noise_reduction(input_path, output_path, method, kernel_size)

# Usage
input_dir = 'path/to/your/dataset'
output_dir = 'path/to/denoised/dataset'
process_dataset(input_dir, output_dir, method='gaussian', kernel_size=5)

In [None]:
import os
import shutil
import random
from collections import Counter

def balance_dataset(input_dir, output_dir, target_count=None):
    class_counts = Counter()

    # Count images in each class
    for class_name in os.listdir(input_dir):
        class_path = os.path.join(input_dir, class_name)
        if os.path.isdir(class_path):
            class_counts[class_name] = len(os.listdir(class_path))

    # Determine target count
    if target_count is None:
        target_count = max(class_counts.values())

    # Balance classes
    for class_name, count in class_counts.items():
        input_class_path = os.path.join(input_dir, class_name)
        output_class_path = os.path.join(output_dir, class_name)
        os.makedirs(output_class_path, exist_ok=True)

        # Copy all original images
        for file in os.listdir(input_class_path):
            shutil.copy2(os.path.join(input_class_path, file), output_class_path)

        # Oversample if necessary
        if count < target_count:
            files = os.listdir(input_class_path)
            for i in range(target_count - count):
                file = random.choice(files)
                new_name = f"{os.path.splitext(file)[0]}_copy{i}{os.path.splitext(file)[1]}"
                shutil.copy2(os.path.join(input_class_path, file),
                             os.path.join(output_class_path, new_name))

# Usage
input_dir = 'path/to/your/dataset'
output_dir = 'path/to/balanced/dataset'
balance_dataset(input_dir, output_dir)