In [18]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.io import imread
from skimage.color import rgb2gray
from skimage.filters import sobel

def assess_image_quality(image_path):
    image = imread(image_path)
    gray_image = rgb2gray(image)

    # Sharpness: Laplacian variance
    sharpness = cv2.Laplacian(image, cv2.CV_64F).var()

    # Noise: Standard deviation of greyscale image
    noise = np.std(gray_image)

    # Colour Balance: Standard deviation of color channels
    color_balance = np.std(image, axis=(0, 1))

    # Background Consistency Using Sobel: Edge detection area
    edge_sobel = sobel(gray_image)
    background_consistency = np.mean(edge_sobel)

    return sharpness, noise, color_balance, background_consistency

# Initialize a DataFrame to store the results
columns = ['Class_Name', 'Image_Path', 'Sharpness', 'Noise', 'Color_Balance', 'Background_Consistency']
results_df = pd.DataFrame(columns=columns)

dataset_path = 'C:/Users/Jarushen/Desktop/Masters Thesis/Images/Fynbos_Dataset_A'
image_extensions = ['.png', '.jpg', '.jpeg', '.bmp', '.tiff', '.jfif']

for class_folder in os.listdir(dataset_path):
    folder_path = os.path.join(dataset_path, class_folder)
    if os.path.isdir(folder_path):  # Check if it's a directory
        for image_name in os.listdir(folder_path):
            if any(image_name.lower().endswith(ext) for ext in image_extensions):
                image_path = os.path.join(folder_path, image_name)
                sharpness, noise, color_balance, background_consistency = assess_image_quality(image_path)

                # Append the results to the DataFrame
                results_df = results_df.append({
                    'Class_Name': class_folder,
                    'Image_Path': image_path,
                    'Sharpness': sharpness,
                    'Noise': noise,
                    'Color_Balance': color_balance,
                    'Background_Consistency': background_consistency
                }, ignore_index=True)

# Optionally, save the DataFrame to a CSV file
results_df.to_csv(r"C:\Users\Jarushen\Desktop\Masters Thesis\Report_Final\images\Results\Data_Quality_Assessment\image_quality_assessment_results.csv", index=False)

# Print the DataFrame
print(results_df)



                 Class_Name  \
0     Agathosma serpyllacea   
1     Agathosma serpyllacea   
2     Agathosma serpyllacea   
3     Agathosma serpyllacea   
4     Agathosma serpyllacea   
...                     ...   
1191     Strelitzia reginae   
1192     Strelitzia reginae   
1193     Strelitzia reginae   
1194     Strelitzia reginae   
1195     Strelitzia reginae   

                                             Image_Path     Sharpness  \
0     C:/Users/Jarushen/Desktop/Masters Thesis/Image...     24.694065   
1     C:/Users/Jarushen/Desktop/Masters Thesis/Image...     25.501727   
2     C:/Users/Jarushen/Desktop/Masters Thesis/Image...     17.606731   
3     C:/Users/Jarushen/Desktop/Masters Thesis/Image...     12.465136   
4     C:/Users/Jarushen/Desktop/Masters Thesis/Image...     42.970167   
...                                                 ...           ...   
1191  C:/Users/Jarushen/Desktop/Masters Thesis/Image...  14824.116678   
1192  C:/Users/Jarushen/Desktop/Masters The

In [3]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.io import imread
from skimage.util import img_as_float
from skimage.color import rgb2gray
from skimage.filters import sobel

# Updated thresholds for scoring based on the statistical analysis
sharpness_threshold = (300, 600)  # Keep original as the range was too wide
noise_threshold = (0.17, 0.25)  # Updated based on quartiles
color_balance_threshold = {
    'R': (46.89, 67.80),
    'G': (44.10, 64.52),
    'B': (39.90, 65.11)
}  # Updated based on quartiles for each channel
background_consistency_threshold = (0.017, 0.101)  # Updated based on quartiles
resolution_threshold = (1000000, 2000000)  # Keep original

def score_metric(value, low, high):
    if value < low:
        return 0
    elif value < high:
        return 1
    else:
        return 2

def assess_image_quality(image_path):
    image = imread(image_path)
    gray_image = rgb2gray(image)

    # Sharpness: Laplacian variance
    sharpness = cv2.Laplacian(image, cv2.CV_64F).var()

    # Noise: Standard deviation of luminance
    noise = np.std(gray_image)

    # Color Balance: Standard deviation of color channels
    color_balance = np.std(image, axis=(0, 1))

    # Background Consistency (simple approach): Edge detection area
    edge_sobel = sobel(gray_image)
    background_consistency = np.mean(edge_sobel)

    return sharpness, noise, color_balance, background_consistency

# Initialize a DataFrame to store the results
columns = ['Class_Name', 'Image_Path', 'Sharpness', 'Noise', 'Color_Balance', 'Background_Consistency',
           'Sharpness_Score', 'Noise_Score', 'Color_Balance_Score', 'Background_Consistency_Score',
           'Resolution_Score', 'Total_Score']
results_df = pd.DataFrame(columns=columns)

dataset_path = 'C:/Users/Jarushen/Desktop/Masters Thesis/Images/Fynbos_Dataset_A'
image_extensions = ['.png', '.jpg', '.jpeg', '.bmp', '.tiff', '.tif']

for class_folder in os.listdir(dataset_path):
    folder_path = os.path.join(dataset_path, class_folder)
    
    # Check if it's a directory
    if not os.path.isdir(folder_path):
        continue

    for image_name in os.listdir(folder_path):
        if any(image_name.lower().endswith(ext) for ext in image_extensions):
            image_path = os.path.join(folder_path, image_name)
            sharpness, noise, color_balance, background_consistency = assess_image_quality(image_path)
            
            # Calculate resolution and its score
            resolution = img_as_float(imread(image_path)).size
            resolution_score = score_metric(resolution, *resolution_threshold)

            # Calculate color balance score for each channel
            color_balance_scores = [score_metric(color_balance[i], *color_balance_threshold[chan])
                                    for i, chan in enumerate(['R', 'G', 'B'])]
            
            # Append the results to the DataFrame
            results_df = results_df.append({
                'Class_Name': class_folder,
                'Image_Path': image_path,
                'Sharpness': sharpness,
                'Noise': noise,
                'Color_Balance': color_balance,
                'Background_Consistency': background_consistency,
                'Sharpness_Score': score_metric(sharpness, *sharpness_threshold),
                'Noise_Score': score_metric(noise, *noise_threshold),
                'Color_Balance_Score': round(np.mean(color_balance_scores)),
                'Background_Consistency_Score': score_metric(background_consistency, *background_consistency_threshold),
                'Resolution_Score': resolution_score,
                'Total_Score': sum([score_metric(sharpness, *sharpness_threshold),
                                    score_metric(noise, *noise_threshold),
                                    np.mean(color_balance_scores),
                                    score_metric(background_consistency, *background_consistency_threshold),
                                    resolution_score])
            }, ignore_index=True)

# Optionally, save the DataFrame to a CSV file
results_df.to_csv("C:/Users/Jarushen/Desktop/Masters Thesis/Report_Final/images/Results/Data_Quality_Assessment/image_quality_assessment_results.csv", index=False)

# Print the DataFrame
print(results_df)



                 Class_Name  \
0     Agathosma serpyllacea   
1     Agathosma serpyllacea   
2     Agathosma serpyllacea   
3     Agathosma serpyllacea   
4     Agathosma serpyllacea   
...                     ...   
1191     Strelitzia reginae   
1192     Strelitzia reginae   
1193     Strelitzia reginae   
1194     Strelitzia reginae   
1195     Strelitzia reginae   

                                             Image_Path     Sharpness  \
0     C:/Users/Jarushen/Desktop/Masters Thesis/Image...     24.694065   
1     C:/Users/Jarushen/Desktop/Masters Thesis/Image...     25.501727   
2     C:/Users/Jarushen/Desktop/Masters Thesis/Image...     17.606731   
3     C:/Users/Jarushen/Desktop/Masters Thesis/Image...     12.465136   
4     C:/Users/Jarushen/Desktop/Masters Thesis/Image...     42.970167   
...                                                 ...           ...   
1191  C:/Users/Jarushen/Desktop/Masters Thesis/Image...  14824.116678   
1192  C:/Users/Jarushen/Desktop/Masters The

In [4]:


# Compute the average total score for each class
class_average_scores = results_df.groupby('Class_Name')['Total_Score'].mean().reset_index()

# Rename the columns for clarity
class_average_scores.columns = ['Class_Name', 'Average_Score']

# Print the result
print(class_average_scores)

                     Class_Name  Average_Score
0         Agathosma serpyllacea       4.411765
1              Aloe arborescens       5.431373
2       Arctotis stoechadifolia       4.984848
3              Aristea capitata       4.933333
4        Baloskion tetraphyllum       3.712121
5         Carpobrotus chilensis       4.923077
6            Carpobrotus edulis       4.666667
7          Cotyledon orbiculata       5.049645
8              Curio talinoides       5.243902
9             Erica arborescens       4.712644
10                Erica cinerea       4.657407
11               Erica discolor       4.322917
12               Erica duthieae       4.755556
13              Erica perspicua       5.266667
14               Gazania rigens       5.333333
15            Grevillea banksii       4.783333
16        Helichrysum petiolare       5.160714
17       Leucadendron argenteum       4.923077
18       Leucadendron laureolum       4.777778
19        Leucadendron salignum       4.508772
20     Leucos

In [21]:
import os
from collections import Counter

def get_file_extension(file_name):
    return os.path.splitext(file_name)[1]

def count_file_types(directory):
    file_types = Counter()
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_extension = get_file_extension(file).lower()
            file_types[file_extension] += 1
    return file_types

dataset_path = 'C:/Users/Jarushen/Desktop/Masters Thesis/Images/Fynbos_Dataset_A'  # Replace with your dataset path
file_type_distribution = count_file_types(dataset_path)

print("File Type Distribution in the Dataset:")
for file_type, count in file_type_distribution.items():
    print(f"{file_type}: {count}")


File Type Distribution in the Dataset:
.ini: 36
.jpg: 1196
