In [29]:
import cv2
import numpy as np
import pandas as pd
import os

# Image Paths
images = {
    "Basophil": r"E:\2. DIP\2. LAB\pythonProject\Projects\Assignment 2\wbc_data\Train\Basophil\Basophil_1.jpg",
    "Eosinophil": r"E:\2. DIP\2. LAB\pythonProject\Projects\Assignment 2\wbc_data\Train\Eosinophil\Eosinophil_1.jpg",
    "Lymphocyte": r"E:\2. DIP\2. LAB\pythonProject\Projects\Assignment 2\wbc_data\Train\Lymphocyte\Lymphocyte_1.jpg",
    "Monocyte": r"E:\2. DIP\2. LAB\pythonProject\Projects\Assignment 2\wbc_data\Train\Monocyte\Monocyte_1.jpg",
    "Neutrophil": r"E:\2. DIP\2. LAB\pythonProject\Projects\Assignment 2\wbc_data\Train\Neutrophil\Neutrophil_1.jpg"
}

# Helper: Extract shape features from a contour
def shape_features(contour):
    area = cv2.contourArea(contour)
    perimeter = cv2.arcLength(contour, True)
    x, y, w, h = cv2.boundingRect(contour)
    aspect_ratio = float(w) / h
    extent = area / (w * h)
    hull = cv2.convexHull(contour)
    hull_area = cv2.contourArea(hull)
    solidity = area / hull_area if hull_area != 0 else 0
    circularity = (4 * np.pi * area / (perimeter ** 2)) if perimeter != 0 else 0
    return area, aspect_ratio, extent, solidity, circularity

# Main feature table
data = []

for label, path in images.items():
    img = cv2.imread(path)
    img = cv2.resize(img, (128, 128))
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    # Segment nucleus using thresholding
    blur = cv2.GaussianBlur(gray, (5, 5), 0)
    _, binary = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    mask_nucleus = cv2.bitwise_not(binary)

    contours, _ = cv2.findContours(mask_nucleus, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Default values
    nucleus_area = 0
    aspect_ratio = extent = solidity = circularity = lobes_estimate = 0

    if contours:
        largest = max(contours, key=cv2.contourArea)
        nucleus_area, aspect_ratio, extent, solidity, circularity = shape_features(largest)
        lobes_estimate = len(contours)

    # Cytoplasm mask (inverse)
    mask_cytoplasm = cv2.bitwise_not(mask_nucleus)

    # Mean colors (HSV)
    mean_hsv_img = cv2.mean(hsv)[:3]
    mean_hsv_nucleus = cv2.mean(hsv, mask_nucleus)[:3]
    mean_hsv_cytoplasm = cv2.mean(hsv, mask_cytoplasm)[:3]

    # Area analysis
    total_area = 128 * 128
    cytoplasm_area = total_area - nucleus_area
    nucleus_to_cell_ratio = nucleus_area / total_area
    cytoplasm_to_cell_ratio = cytoplasm_area / total_area

    # Save features
    data.append({
        "Class": label,
        "Nucleus_Area": nucleus_area,
        "Cytoplasm_Area": cytoplasm_area,
        "Nucleus/Cell_Ratio": nucleus_to_cell_ratio,
        "Cytoplasm/Cell_Ratio": cytoplasm_to_cell_ratio,
        "Aspect_Ratio": aspect_ratio,
        "Extent": extent,
        "Solidity": solidity,
        "Circularity": circularity,
        "Estimated_Lobes": lobes_estimate,
        "HSV_H_Total": mean_hsv_img[0],
        "HSV_S_Total": mean_hsv_img[1],
        "HSV_V_Total": mean_hsv_img[2],
        "HSV_H_Nucleus": mean_hsv_nucleus[0],
        "HSV_S_Nucleus": mean_hsv_nucleus[1],
        "HSV_V_Nucleus": mean_hsv_nucleus[2],
        "HSV_H_Cytoplasm": mean_hsv_cytoplasm[0],
        "HSV_S_Cytoplasm": mean_hsv_cytoplasm[1],
        "HSV_V_Cytoplasm": mean_hsv_cytoplasm[2],
    })

# Create DataFrame
df = pd.DataFrame(data)
print(df)

# Save CSV
df.to_csv("wbc_features_updated.csv", index=False)


        Class  Nucleus_Area  Cytoplasm_Area  Nucleus/Cell_Ratio  \
0    Basophil        4879.0         11505.0            0.297791   
1  Eosinophil        6003.5         10380.5            0.366425   
2  Lymphocyte        5906.0         10478.0            0.360474   
3    Monocyte        3114.5         13269.5            0.190094   
4  Neutrophil        4910.0         11474.0            0.299683   

   Cytoplasm/Cell_Ratio  Aspect_Ratio    Extent  Solidity  Circularity  \
0              0.702209      1.000000  0.410656  0.612286     0.161750   
1              0.633575      0.921053  0.501546  0.693886     0.223163   
2              0.639526      0.867188  0.415681  0.534697     0.154788   
3              0.809906      0.614583  0.549876  0.818528     0.421179   
4              0.700317      1.270588  0.534858  0.654318     0.164512   

   Estimated_Lobes  HSV_H_Total  HSV_S_Total  HSV_V_Total  HSV_H_Nucleus  \
0                6    68.177917    65.512207   166.023682     128.838372   


In [2]:
# Group by Class and get mean and range for each feature
stats = df.groupby("Class").agg(['mean', 'min', 'max', 'std'])

# Print selected important features for inspection
features_of_interest = [
    "Circularity", "Estimated_Lobes", "Nucleus/Cell_Ratio",
    "HSV_H_Nucleus", "HSV_S_Nucleus", "HSV_H_Cytoplasm", "HSV_S_Cytoplasm"
]


print(stats.loc[:, features_of_interest])


           Circularity                         Estimated_Lobes              \
                  mean       min       max std            mean min max std   
Class                                                                        
Basophil      0.161750  0.161750  0.161750 NaN             6.0   6   6 NaN   
Eosinophil    0.223163  0.223163  0.223163 NaN             7.0   7   7 NaN   
Lymphocyte    0.154788  0.154788  0.154788 NaN            10.0  10  10 NaN   
Monocyte      0.421179  0.421179  0.421179 NaN            14.0  14  14 NaN   
Neutrophil    0.164512  0.164512  0.164512 NaN             6.0   6   6 NaN   

           Nucleus/Cell_Ratio            ... HSV_S_Nucleus      \
                         mean       min  ...           max std   
Class                                    ...                     
Basophil             0.297791  0.297791  ...     81.014887 NaN   
Eosinophil           0.366425  0.366425  ...    100.332847 NaN   
Lymphocyte           0.360474  0.360474  ... 

In [3]:
features_of_interest = [
    "Circularity", "Estimated_Lobes", "Nucleus/Cell_Ratio",
    "HSV_H_Nucleus", "HSV_S_Nucleus", "HSV_H_Cytoplasm", "HSV_S_Cytoplasm"
]

# Grouped statistics
stats = df.groupby("Class")[features_of_interest].agg(['mean', 'min', 'max'])
print(stats)


           Circularity                     Estimated_Lobes          \
                  mean       min       max            mean min max   
Class                                                                
Basophil      0.161750  0.161750  0.161750             6.0   6   6   
Eosinophil    0.223163  0.223163  0.223163             7.0   7   7   
Lymphocyte    0.154788  0.154788  0.154788            10.0  10  10   
Monocyte      0.421179  0.421179  0.421179            14.0  14  14   
Neutrophil    0.164512  0.164512  0.164512             6.0   6   6   

           Nucleus/Cell_Ratio                     HSV_H_Nucleus  ...  \
                         mean       min       max          mean  ...   
Class                                                            ...   
Basophil             0.297791  0.297791  0.297791    128.838372  ...   
Eosinophil           0.366425  0.366425  0.366425    166.857456  ...   
Lymphocyte           0.360474  0.360474  0.360474    153.743320  ...   
Monocyt

In [4]:
def classify_cell(features):
    circ = features['Circularity']
    lobes = features['Estimated_Lobes']
    nuc_ratio = features['Nucleus/Cell_Ratio']
    h_nuc = features['HSV_H_Nucleus']
    h_cyt = features['HSV_H_Cytoplasm']
    s_cyt = features['HSV_S_Cytoplasm']

    # Monocyte: large, roundish, pale cytoplasm, low Nucleus ratio
    if circ > 0.35 and lobes >= 12 and nuc_ratio < 0.25:
        return "Monocyte"

    # Lymphocyte: small round nucleus, high lobes, high nucleus ratio
    elif lobes >= 9 and nuc_ratio >= 0.35 and circ < 0.2:
        return "Lymphocyte"

    # Eosinophil: red-orange granules → low hue in cytoplasm, high sat
    elif h_cyt < 20 and s_cyt > 60 and h_nuc > 160:
        return "Eosinophil"

    # Basophil: purple-blue cytoplasm → mid hue, medium lobes
    elif 120 < h_nuc < 140 and 20 < h_cyt < 35 and lobes <= 6:
        return "Basophil"

    # Neutrophil: mid-range features, 5–6 lobes, nucleus ratio ~0.3
    elif 5 <= lobes <= 6 and 0.28 <= nuc_ratio <= 0.32 and 150 < h_nuc < 165:
        return "Neutrophil"

    return "Unknown"


In [19]:
import cv2
import numpy as np

def extract_features(image_path):
    def shape_features(contour):
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = float(w) / h
        extent = area / (w * h)
        hull = cv2.convexHull(contour)
        hull_area = cv2.contourArea(hull)
        solidity = area / hull_area if hull_area != 0 else 0
        circularity = (4 * np.pi * area / (perimeter ** 2)) if perimeter != 0 else 0
        return area, aspect_ratio, extent, solidity, circularity

    img = cv2.imread(image_path)
    if img is None:
        raise FileNotFoundError(f"❌ Could not load image at path: {image_path}")

    img = cv2.resize(img, (128, 128))
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    blur = cv2.GaussianBlur(gray, (5, 5), 0)
    _, binary = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    mask_nucleus = cv2.bitwise_not(binary)

    contours, _ = cv2.findContours(mask_nucleus, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    nucleus_area = 0
    aspect_ratio = extent = solidity = circularity = lobes_estimate = 0

    if contours:
        largest = max(contours, key=cv2.contourArea)
        nucleus_area, aspect_ratio, extent, solidity, circularity = shape_features(largest)
        lobes_estimate = len(contours)

    mask_cytoplasm = cv2.bitwise_not(mask_nucleus)
    mean_hsv_nucleus = cv2.mean(hsv, mask_nucleus)[:3]
    mean_hsv_cytoplasm = cv2.mean(hsv, mask_cytoplasm)[:3]

    total_area = 128 * 128
    nucleus_ratio = nucleus_area / total_area

    return {
        'Circularity': circularity,
        'Estimated_Lobes': lobes_estimate,
        'Nucleus/Cell_Ratio': nucleus_ratio,
        'HSV_H_Nucleus': mean_hsv_nucleus[0],
        'HSV_S_Nucleus': mean_hsv_nucleus[1],
        'HSV_H_Cytoplasm': mean_hsv_cytoplasm[0],
        'HSV_S_Cytoplasm': mean_hsv_cytoplasm[1]
    }

def classify_cell(features):
    circ = features['Circularity']
    lobes = features['Estimated_Lobes']
    nuc_ratio = features['Nucleus/Cell_Ratio']
    h_nuc = features['HSV_H_Nucleus']
    h_cyt = features['HSV_H_Cytoplasm']
    s_cyt = features['HSV_S_Cytoplasm']

    # Monocyte: high circularity, high lobes, low nucleus ratio
    if circ > 0.35 and lobes >= 12 and nuc_ratio < 0.25:
        return "Monocyte"

    # Lymphocyte: small round nucleus, high lobes, high nucleus ratio
    elif lobes >= 9 and nuc_ratio >= 0.35 and circ < 0.25:
        return "Lymphocyte"

    # Eosinophil: reddish cytoplasm (low hue, high sat), high nucleus hue
    elif h_cyt < 30 and s_cyt > 55 and h_nuc > 160:
        return "Eosinophil"

    # Basophil: purple nucleus, bluish cytoplasm, low circularity
    elif 120 < h_nuc < 140 and 25 < h_cyt < 45 and lobes <= 7 and circ < 0.3:
        return "Basophil"

    # Neutrophil: lobes 5–7, nucleus ratio ~0.3, nucleus hue mid range
    elif 5 <= lobes <= 7 and 0.28 <= nuc_ratio <= 0.34 and 130 < h_nuc < 160:
        return "Neutrophil"

    return "Unknown"

# 🔍 Usage Example
test_image_path = r"E:\2. DIP\2. LAB\pythonProject\Projects\Assignment 2\wbc_data\Train\Eosinophil\Eosinophil_1.jpg"
features = extract_features(test_image_path)
print(features)
predicted_class = classify_cell(features)

print("Predicted Class:", predicted_class)


{'Circularity': 0.22316277687124306, 'Estimated_Lobes': 7, 'Nucleus/Cell_Ratio': 0.366424560546875, 'HSV_H_Nucleus': 166.85745582006717, 'HSV_S_Nucleus': 100.3328465021177, 'HSV_H_Cytoplasm': 17.055048757470903, 'HSV_S_Cytoplasm': 65.13505295166195}
Predicted Class: Eosinophil


In [20]:
import math

def euclidean_distance(f1, f2, keys):
    return math.sqrt(sum((f1[k] - f2[k]) ** 2 for k in keys))

def classify_cell_with_fallback(features, class_means):
    prediction = classify_cell(features)
    if prediction != "Unknown":
        return prediction

    # Fallback: similarity-based classification
    keys = ['Circularity', 'Estimated_Lobes', 'Nucleus/Cell_Ratio',
            'HSV_H_Nucleus', 'HSV_H_Cytoplasm', 'HSV_S_Cytoplasm']

    min_dist = float('inf')
    closest_class = "Unknown"
    for label, mean_feats in class_means.items():
        dist = euclidean_distance(features, mean_feats, keys)
        if dist < min_dist:
            min_dist = dist
            closest_class = label

    return closest_class
# Pre-compute mean feature vectors from your DataFrame
import pandas as pd

df = pd.read_csv("wbc_features_updated.csv")

# Use only relevant features
mean_vectors = {}
selected_keys = ['Circularity', 'Estimated_Lobes', 'Nucleus/Cell_Ratio',
                 'HSV_H_Nucleus', 'HSV_H_Cytoplasm', 'HSV_S_Cytoplasm']

for cls in df['Class'].unique():
    row = df[df['Class'] == cls][selected_keys].mean()
    mean_vectors[cls] = row.to_dict()


In [21]:
features = extract_features(test_image_path)
predicted = classify_cell_with_fallback(features, mean_vectors)
print("Predicted Class:", predicted)


Predicted Class: Eosinophil


In [22]:
import os
import pandas as pd

def process_test_folder(test_dir, class_means):
    results = []
    supported_exts = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')

    for root, dirs, files in os.walk(test_dir):
        for file in files:
            if not file.lower().endswith(supported_exts):
                continue

            full_path = os.path.join(root, file)
            true_label = os.path.basename(root)  # folder name = true label

            try:
                features = extract_features(full_path)
                predicted = classify_cell_with_fallback(features, class_means)

                result_row = {
                    "Image": full_path,
                    "True_Class": true_label,
                    "Predicted_Class": predicted,
                    "Match": "Yes" if predicted == true_label else "No"
                }
                result_row.update(features)
                results.append(result_row)

            except Exception as e:
                print(f"⚠️ Error processing {full_path}: {e}")

    return pd.DataFrame(results)
import os
import pandas as pd

def process_test_folder(test_dir, class_means):
    results = []
    supported_exts = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')

    for root, dirs, files in os.walk(test_dir):
        for file in files:
            if not file.lower().endswith(supported_exts):
                continue

            full_path = os.path.join(root, file)
            true_label = os.path.basename(root)  # folder name = true label

            try:
                features = extract_features(full_path)
                predicted = classify_cell_with_fallback(features, class_means)

                result_row = {
                    "Image": full_path,
                    "True_Class": true_label,
                    "Predicted_Class": predicted,
                    "Match": "Yes" if predicted == true_label else "No"
                }
                result_row.update(features)
                results.append(result_row)

            except Exception as e:
                print(f"⚠️ Error processing {full_path}: {e}")

    return pd.DataFrame(results)
# Example test set folder
test_dir = r"E:\2. DIP\2. LAB\pythonProject\Projects\Assignment 2\wbc_data\Test"

# Ensure mean_vectors is defined (see previous step)
df_results = process_test_folder(test_dir, mean_vectors)

# Save and view results
df_results.to_csv("wbc_test_results.csv", index=False)
print(df_results.head())
import pandas as pd

# Load the results from the CSV file
df_results = pd.read_csv("wbc_test_results.csv")

# Calculate accuracy: Number of correct predictions / Total number of predictions
correct_predictions = df_results['Match'].str.lower() == 'yes'
accuracy = correct_predictions.mean()

# Print the accuracy
print(f"Accuracy: {accuracy * 100:.2f}%")



                                               Image True_Class  \
0  E:\2. DIP\2. LAB\pythonProject\Projects\Assign...   Basophil   
1  E:\2. DIP\2. LAB\pythonProject\Projects\Assign...   Basophil   
2  E:\2. DIP\2. LAB\pythonProject\Projects\Assign...   Basophil   
3  E:\2. DIP\2. LAB\pythonProject\Projects\Assign...   Basophil   
4  E:\2. DIP\2. LAB\pythonProject\Projects\Assign...   Basophil   

  Predicted_Class Match  Circularity  Estimated_Lobes  Nucleus/Cell_Ratio  \
0        Basophil   Yes     0.248795                6            0.332581   
1      Lymphocyte    No     0.455469               10            0.177399   
2        Basophil   Yes     0.276540                6            0.375732   
3      Neutrophil    No     0.466351               10            0.274994   
4      Lymphocyte    No     0.375573               11            0.367950   

   HSV_H_Nucleus  HSV_S_Nucleus  HSV_H_Cytoplasm  HSV_S_Cytoplasm  
0     135.121720      89.033117        35.588284        44.694822 

In [25]:
df_results.columns

Index(['Image', 'True_Class', 'Predicted_Class', 'Match', 'Circularity',
       'Estimated_Lobes', 'Nucleus/Cell_Ratio', 'HSV_H_Nucleus',
       'HSV_S_Nucleus', 'HSV_H_Cytoplasm', 'HSV_S_Cytoplasm'],
      dtype='object')

In [26]:
import pandas as pd

# Load the results from the CSV file
df_results = pd.read_csv("wbc_test_results.csv")

# Calculate accuracy: Number of correct predictions / Total number of predictions
correct_predictions = df_results['Match'].str.lower() == 'yes'
accuracy = correct_predictions.mean()

# Print the accuracy
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 30.80%
