# Healthy and Bleached Corals Image Classification

## Install dependencies

In [1]:
!pip install numpy pillow scikit-learn seaborn



## Imports

In [2]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier


## Dataset Overview

In [5]:
bleached_path = "./data/bleached_corals"
healthy_path = "./data/healthy_corals"

def get_image_paths(folder_path):
    return [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.jpg')]

# Get all image paths
bleached_image_paths = get_image_paths(bleached_path)
print(f"Bleached Corals - Total Images: {len(bleached_image_paths)}")
healthy_image_paths = get_image_paths(healthy_path)
print(f"Healthy Corals - Total Images: {len(healthy_image_paths)}")

def analyze_images(image_paths):
    dimensions = []
    for img_path in image_paths:
        with Image.open(img_path) as img:
            dimensions.append(img.size)  # (width, height)
    
    dimensions = np.array(dimensions)
    
    avg_width = np.mean(dimensions[:, 0])
    min_width = np.min(dimensions[:, 0])
    max_width = np.max(dimensions[:, 0])
    avg_height = np.mean(dimensions[:, 1])
    min_height = np.min(dimensions[:, 1])
    max_height = np.max(dimensions[:, 1])
    
    return avg_width, min_width, max_width, avg_height, min_height, max_height, dimensions

# Analyze bleached coral images
bleached_avg_width, bleached_min_width, bleached_max_width, bleached_avg_height, bleached_min_height, bleached_max_height, bleached_dimensions = analyze_images(bleached_image_paths)
print(f"Bleached Corals - Average Width: {bleached_avg_width}, Average Height: {bleached_avg_height}")
print(f"Bleached Corals - Minimum Width: {bleached_min_width}, Maximum Width: {bleached_max_width}")
print(f"Bleached Corals - Minimum Height: {bleached_min_height}, Maximum Height: {bleached_max_height}")

# Analyze healthy coral images
healthy_avg_width, healthy_min_width, healthy_max_width, healthy_avg_height, healthy_min_height, healthy_max_height, healthy_dimensions = analyze_images(healthy_image_paths)
print(f"Healthy Corals - Average Width: {healthy_avg_width}, Average Height: {healthy_avg_height}")
print(f"Healthy Corals - Minimum Width: {healthy_min_width}, Maximum Width: {healthy_max_width}")
print(f"Healthy Corals - Minimum Height: {healthy_min_height}, Maximum Height: {healthy_max_height}")

Bleached Corals - Total Images: 484
Healthy Corals - Total Images: 438
Bleached Corals - Average Width: 301.3347107438017, Average Height: 224.98760330578511
Bleached Corals - Minimum Width: 199, Maximum Width: 1024
Bleached Corals - Minimum Height: 134, Maximum Height: 1024
Healthy Corals - Average Width: 288.51826484018267, Average Height: 237.96118721461187
Healthy Corals - Minimum Width: 150, Maximum Width: 1024
Healthy Corals - Minimum Height: 169, Maximum Height: 768


## Load and Resize Images

In [6]:
def load_and_resize_images(image_paths, size=(64, 64)):
    images = []
    for img_path in image_paths:
        with Image.open(img_path) as img:
            img = img.resize(size)
            images.append(np.array(img).flatten())  # Flatten the image into a 1D array
    return np.array(images)

bleached_images = load_and_resize_images(bleached_image_paths)
healthy_images = load_and_resize_images(healthy_image_paths)

## Labeling the dataset

In [7]:
# Create labels for the images
bleached_labels = ['bleached'] * len(bleached_images)
healthy_labels = ['healthy'] * len(healthy_images)

# Combine the data and labels
data = np.vstack((bleached_images, healthy_images))
labels = np.array(bleached_labels + healthy_labels)

# Encode the labels to numerical values
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

## Split in training and testing data

In [8]:
X_train, X_test, y_train, y_test = train_test_split(data, labels_encoded, test_size=0.3, random_state=42)

## Train a classifier

In [9]:

# classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier = KNeighborsClassifier(n_neighbors=5)
# classifier = DecisionTreeClassifier(max_depth=20)
classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

Accuracy: 0.7328519855595668
Classification Report:
              precision    recall  f1-score   support

    bleached       0.72      0.85      0.78       154
     healthy       0.76      0.59      0.66       123

    accuracy                           0.73       277
   macro avg       0.74      0.72      0.72       277
weighted avg       0.74      0.73      0.73       277

